`f128` symbols on powerpc64 give inaccurate results

For some reason, the `__addkf3` symbol linked by rustc says that `0x00000000000000000000000000000000 + 0x00000000000000000000000000000001 = 0x00000000000000000000000000000000`, when the answer should be `0x00000000000000000000000000000001`. Other symbols likely do the wrong thing here too.

Example:

```sh
$ powerpc64-linux-gnu-gcc add_test.c -o add_test.c.ppc64
$ rustc add_test.rs -o add_test.rs.ppc64 --target powerpc64-unknown-linux-gnu -Clinker=powerpc64-linux-gnu-gcc

$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ add_test.c.ppc64
$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ add_test.rs.ppc64

$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ add_test.c.ppc64
0000000000000000000000000000000000 0.000000
0000000000000000000000000000000001 0.000000
0000000000000000000000000000000001 0.000000

$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ add_test.rs.ppc64
[add_test.rs:12:5] a = 0x00000000000000000000000000000000
[add_test.rs:12:5] b = 0x00000000000000000000000000000001
[add_test.rs:14:5] c = 0x00000000000000000000000000000000
```

Our Rust code emits `__addkf3` which calls the hardware f128 addition routine `xsaddup`. The part I cannot explain is that the C version also emits `__addkf3` which _also_ calls `xsaddup`, but somehow produces a correct result. I checked with GDB to make sure they are both hitting `xsaddup`, so can't really explain the discrepancy.

Original notes at https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105657287, discussion on Zulip at https://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/f128.20system.20libraries.20noncompliant.20platforms/near/438486364.

Full code copied from https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2106197914:

<details>
<summary>Rust code</summary>

```rust
#![feature(f128)]

#[no_mangle]
#[inline(never)]
fn add_entry(a: f128, b: f128) -> f128 {
    a + b
}

fn main() {
    let a = f128::from_bits(0x0);
    let b = f128::from_bits(0x1);
    dbg!(a, b);
    let c = add_entry(a, b);
    dbg!(c);
}
```
</details>

<details>
<summary>C code</summary>

```c
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>

#define _Float128 __float128

typedef struct {
#if __BYTE_ORDER == __LITTLE_ENDIAN
    uint64_t lower, upper;
#elif __BYTE_ORDER == __BIG_ENDIAN
    uint64_t upper, lower;
#else
#error missing endian check
#endif
} __attribute__((aligned(_Alignof(_Float128)))) u128;

_Float128 __addkf3(_Float128, _Float128);

void f128_print(_Float128 val) {
    u128 ival = *((u128 *)(&val));
    printf("%#018" PRIx64 "%016" PRIx64 " %lf\n", ival.upper, ival.lower, (double)val);
}

_Float128 new_f128(uint64_t upper, uint64_t lower) {
    u128 val = { .lower = lower, .upper = upper };
    return *((_Float128 *)(&val));
}

_Float128 add_entry(_Float128 a, _Float128 b) {
#ifdef USE_ADDKF3
  return __addkf3(a, b);
#else
  return a + b;
#endif
}

int main() {
    _Float128 a = new_f128(0x0000000000000000, 0x0000000000000000);
    _Float128 b = new_f128(0x0000000000000000, 0x0000000000000001);
    f128_print(a);
    f128_print(b);
    _Float128 c = add_entry(a, b);
    f128_print(c);

    return 0;
}
```
</details>

<details>
<summary>Assembly generated from Rust (incorrect result)</summary>

```asm
0000000000010d00 <.add_entry>:
   10d00:   7c 08 02 a6     mflr    r0
   10d04:   f8 21 ff 91     stdu    r1,-112(r1)
   10d08:   f8 01 00 80     std     r0,128(r1)
   10d0c:   4b ff c6 95     bl      d3a0 <00000143.plt_call.__addkf3>
   10d10:   e8 41 00 28     ld      r2,40(r1)
   10d14:   38 21 00 70     addi    r1,r1,112
   10d18:   e8 01 00 10     ld      r0,16(r1)
   10d1c:   7c 08 03 a6     mtlr    r0
   10d20:   4e 80 00 20     blr

000000000000d3a0 <00000143.plt_call.__addkf3>:
    d3a0:       f8 41 00 28     std     r2,40(r1)
    d3a4:       3d 62 ff ff     addis   r11,r2,-1
    d3a8:       e9 8b 7f 58     ld      r12,32600(r11)
    d3ac:       7d 89 03 a6     mtctr   r12
    d3b0:       e8 4b 7f 60     ld      r2,32608(r11)
    d3b4:       4e 80 04 20     bctr

0000000000056790 <.__addkf3_resolve>:
   56790:       81 2d 8f 9c     lwz     r9,-28772(r13)
   56794:       75 29 00 40     andis.  r9,r9,64
   56798:       41 82 00 18     beq     567b0 <.__addkf3_resolve+0x20>
   5679c:       e8 62 80 10     ld      r3,-32752(r2)
   567a0:       4e 80 00 20     blr
   567a4:       60 00 00 00     nop
   567a8:       60 00 00 00     nop
   567ac:       60 42 00 00     ori     r2,r2,0
   567b0:       e8 62 80 18     ld      r3,-32744(r2)
   567b4:       4e 80 00 20     blr
        ...
   567c4:       60 00 00 00     nop
   567c8:       60 00 00 00     nop
   567cc:       60 42 00 00     ori     r2,r2,0

000000000005e6e0 <.__addkf3_hw>:
   5e6e0:       fc 42 18 08     xsaddqp v2,v2,v3
   5e6e4:       4e 80 00 20     blr
        ...
   5e6f4:       60 00 00 00     nop
   5e6f8:       60 00 00 00     nop
   5e6fc:       60 00 00 00     nop

0000000000057050 <.__addkf3_sw>:
   57050:       fb 41 ff d0     std     r26,-48(r1)
   57054:       fb 61 ff d8     std     r27,-40(r1)
   57058:       fb 81 ff e0     std     r28,-32(r1)
   5705c:       fb a1 ff e8     std     r29,-24(r1)
   57060:       fb c1 ff f0     std     r30,-16(r1)
   57064:       fb e1 ff f8     std     r31,-8(r1)
   57068:       f8 21 ff 41     stdu    r1,-192(r1)
   5706c:       39 21 00 70     addi    r9,r1,112
   57070:       39 41 00 70     addi    r10,r1,112
   // ... full sw implementation
```
</details>

<details>
<summary>Assembly generated from C (correct result)</summary>

```asm
0000000010000af8 <.add_entry>:
    10000af8:   7c 08 02 a6     mflr    r0
    10000afc:   f8 01 00 10     std     r0,16(r1)
    10000b00:   fb e1 ff f8     std     r31,-8(r1)
    10000b04:   f8 21 ff 81     stdu    r1,-128(r1)
    10000b08:   7c 3f 0b 78     mr      r31,r1
    10000b0c:   39 20 00 30     li      r9,48
    10000b10:   39 5f 00 80     addi    r10,r31,128
    10000b14:   7c 4a 4f 99     stxvd2x vs34,r10,r9
    10000b18:   39 20 00 40     li      r9,64
    10000b1c:   39 5f 00 80     addi    r10,r31,128
    10000b20:   7c 6a 4f 99     stxvd2x vs35,r10,r9
    10000b24:   39 20 00 40     li      r9,64
    10000b28:   39 5f 00 80     addi    r10,r31,128
    10000b2c:   7c 6a 4e 99     lxvd2x  vs35,r10,r9
    10000b30:   39 20 00 30     li      r9,48
    10000b34:   39 5f 00 80     addi    r10,r31,128
    10000b38:   7c 4a 4e 99     lxvd2x  vs34,r10,r9
    10000b3c:   4b ff fb a5     bl      100006e0 <00000019.plt_call.__addkf3>
    10000b40:   e8 41 00 28     ld      r2,40(r1)
    10000b44:   f0 02 14 96     xxmr    vs0,vs34
    10000b48:   f0 40 04 91     xxmr    vs34,vs0
    10000b4c:   38 3f 00 80     addi    r1,r31,128
    10000b50:   e8 01 00 10     ld      r0,16(r1)
    10000b54:   7c 08 03 a6     mtlr    r0
    10000b58:   eb e1 ff f8     ld      r31,-8(r1)
    10000b5c:   4e 80 00 20     blr
    10000b60:   00 00 00 00     .long 0x0
    10000b64:   00 00 00 01     .long 0x1
    10000b68:   80 01 00 01     lwz     r0,1(r1)

0000000010000c40 <.__addkf3_resolve>:
    10000c40:   81 2d 8f 9c     lwz     r9,-28772(r13)
    10000c44:   75 29 00 40     andis.  r9,r9,64
    10000c48:   41 82 00 18     beq     10000c60 <.__addkf3_resolve+0x20>
    10000c4c:   e8 62 80 10     ld      r3,-32752(r2)
    10000c50:   4e 80 00 20     blr
    10000c54:   60 00 00 00     nop
    10000c58:   60 00 00 00     nop
    10000c5c:   60 42 00 00     ori     r2,r2,0
    10000c60:   e8 62 80 18     ld      r3,-32744(r2)
    10000c64:   4e 80 00 20     blr
        ...
    10000c74:   60 00 00 00     nop
    10000c78:   60 00 00 00     nop
    10000c7c:   60 42 00 00     ori     r2,r2,0

0000000010008b90 <.__addkf3_hw>:
    10008b90:   fc 42 18 08     xsaddqp v2,v2,v3
    10008b94:   4e 80 00 20     blr
        ...
    10008ba4:   60 00 00 00     nop
    10008ba8:   60 00 00 00     nop
    10008bac:   60 00 00 00     nop

0000000010001500 <.__addkf3_sw>:
    10001500:   fb 41 ff d0     std     r26,-48(r1)
    10001504:   fb 61 ff d8     std     r27,-40(r1)
    10001508:   fb 81 ff e0     std     r28,-32(r1)
    1000150c:   fb a1 ff e8     std     r29,-24(r1)
    10001510:   fb c1 ff f0     std     r30,-16(r1)
    10001514:   fb e1 ff f8     std     r31,-8(r1)
    10001518:   f8 21 ff 41     stdu    r1,-192(r1)
    1000151c:   39 21 00 70     addi    r9,r1,112
    10001520:   39 41 00 70     addi    r10,r1,112
    // ...
```
</details>

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

`f128` symbols on powerpc64 give inaccurate results #125109

2 remaining items

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Participants

f128 symbols on powerpc64 give inaccurate results #125109

Description

Activity

tgross35 commented on May 14, 2024

beetrees commented on May 16, 2024

tgross35 commented on May 16, 2024

beetrees commented on May 16, 2024

2 remaining items

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Participants

Issue actions

`f128` symbols on powerpc64 give inaccurate results #125109