Open
Description
rustc emits the following LLVM-IR:
Original IR
; ModuleID = 'test.6a688027-cgu.0'
source_filename = "test.6a688027-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; core::hint::unreachable_unchecked
; Function Attrs: inlinehint noreturn nonlazybind uwtable
define internal void @_ZN4core4hint21unreachable_unchecked17h79872ecff0660357E() unnamed_addr #0 {
start:
unreachable
}
; <core::num::nonzero::NonZeroU32 as core::cmp::PartialEq>::eq
; Function Attrs: inlinehint nonlazybind uwtable
define internal zeroext i1 @"_ZN71_$LT$core..num..nonzero..NonZeroU32$u20$as$u20$core..cmp..PartialEq$GT$2eq17hcc6145d807d01b45E"(i32* noalias readonly align 4 dereferenceable(4) %self, i32* noalias readonly align 4 dereferenceable(4) %other) unnamed_addr #1 {
start:
%_5 = load i32, i32* %self, align 4
%_6 = load i32, i32* %other, align 4
%0 = icmp eq i32 %_5, %_6
ret i1 %0
}
; test::disc
; Function Attrs: nonlazybind uwtable
define internal i8 @_ZN4test4disc17hd5bc60725a718a4dE(i32* noalias readonly align 4 dereferenceable(4) %a) unnamed_addr #2 {
start:
%0 = alloca i8, align 1
%1 = load i32, i32* %a, align 4
%2 = icmp eq i32 %1, 0
%_2 = select i1 %2, i64 0, i64 1
switch i64 %_2, label %bb2 [
i64 0, label %bb1
i64 1, label %bb3
]
bb2: ; preds = %start
unreachable
bb1: ; preds = %start
store i8 0, i8* %0, align 1
br label %bb4
bb3: ; preds = %start
store i8 1, i8* %0, align 1
br label %bb4
bb4: ; preds = %bb1, %bb3
%3 = load i8, i8* %0, align 1
ret i8 %3
}
; test::cmp3
; Function Attrs: nonlazybind uwtable
define zeroext i1 @_ZN4test4cmp317h6f7d157bae19d84dE(i32 %0, i32 %1) unnamed_addr #2 {
start:
%y = alloca i32, align 4
%x = alloca i32, align 4
%_10 = alloca { i32, i32 }, align 4
%2 = alloca i8, align 1
%b = alloca i32, align 4
%a = alloca i32, align 4
store i32 %0, i32* %a, align 4
store i32 %1, i32* %b, align 4
; call test::disc
%_4 = call i8 @_ZN4test4disc17hd5bc60725a718a4dE(i32* noalias readonly align 4 dereferenceable(4) %a)
br label %bb1
bb1: ; preds = %start
; call test::disc
%_7 = call i8 @_ZN4test4disc17hd5bc60725a718a4dE(i32* noalias readonly align 4 dereferenceable(4) %b)
br label %bb2
bb2: ; preds = %bb1
%_3 = icmp eq i8 %_4, %_7
br i1 %_3, label %bb3, label %bb11
bb11: ; preds = %bb2
store i8 0, i8* %2, align 1
br label %bb12
bb3: ; preds = %bb2
%3 = bitcast { i32, i32 }* %_10 to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %3)
%_11 = load i32, i32* %a, align 4
%_12 = load i32, i32* %b, align 4
%4 = bitcast { i32, i32 }* %_10 to i32*
store i32 %_11, i32* %4, align 4
%5 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %_10, i32 0, i32 1
store i32 %_12, i32* %5, align 4
%6 = bitcast { i32, i32 }* %_10 to i32*
%7 = load i32, i32* %6, align 4
%8 = icmp eq i32 %7, 0
%_15 = select i1 %8, i64 0, i64 1
switch i64 %_15, label %bb5 [
i64 0, label %bb4
i64 1, label %bb6
]
bb5: ; preds = %bb4, %bb6, %bb3
; call core::hint::unreachable_unchecked
call void @_ZN4core4hint21unreachable_unchecked17h79872ecff0660357E()
unreachable
bb4: ; preds = %bb3
%9 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %_10, i32 0, i32 1
%10 = load i32, i32* %9, align 4
%11 = icmp eq i32 %10, 0
%_13 = select i1 %11, i64 0, i64 1
%12 = icmp eq i64 %_13, 0
br i1 %12, label %bb9, label %bb5
bb6: ; preds = %bb3
%13 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %_10, i32 0, i32 1
%14 = load i32, i32* %13, align 4
%15 = icmp eq i32 %14, 0
%_14 = select i1 %15, i64 0, i64 1
%16 = icmp eq i64 %_14, 1
br i1 %16, label %bb7, label %bb5
bb7: ; preds = %bb6
%17 = bitcast i32* %x to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %17)
%18 = bitcast { i32, i32 }* %_10 to i32*
%19 = load i32, i32* %18, align 4, !range !2
store i32 %19, i32* %x, align 4
%20 = bitcast i32* %y to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %20)
%21 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %_10, i32 0, i32 1
%22 = load i32, i32* %21, align 4, !range !2
store i32 %22, i32* %y, align 4
; call <core::num::nonzero::NonZeroU32 as core::cmp::PartialEq>::eq
%23 = call zeroext i1 @"_ZN71_$LT$core..num..nonzero..NonZeroU32$u20$as$u20$core..cmp..PartialEq$GT$2eq17hcc6145d807d01b45E"(i32* noalias readonly align 4 dereferenceable(4) %x, i32* noalias readonly align 4 dereferenceable(4) %y)
%24 = zext i1 %23 to i8
store i8 %24, i8* %2, align 1
br label %bb8
bb8: ; preds = %bb7
%25 = bitcast i32* %y to i8*
call void @llvm.lifetime.end.p0i8(i64 4, i8* %25)
%26 = bitcast i32* %x to i8*
call void @llvm.lifetime.end.p0i8(i64 4, i8* %26)
br label %bb10
bb10: ; preds = %bb9, %bb8
%27 = bitcast { i32, i32 }* %_10 to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %27)
br label %bb12
bb9: ; preds = %bb4
store i8 1, i8* %2, align 1
br label %bb10
bb12: ; preds = %bb11, %bb10
%28 = load i8, i8* %2, align 1, !range !3
%29 = trunc i8 %28 to i1
ret i1 %29
}
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
attributes #0 = { inlinehint noreturn nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #1 = { inlinehint nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #2 = { nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #3 = { argmemonly nofree nosync nounwind willreturn }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 7, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{i32 1, i32 0}
!3 = !{i8 0, i8 2}
which after optimization becomes
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define zeroext i1 @_ZN4test4cmp317h6f7d157bae19d84dE(i32 %0, i32 %1) unnamed_addr #0 {
start:
%2 = icmp eq i32 %0, 0
%3 = icmp ne i32 %1, 0
%_3 = xor i1 %2, %3
br i1 %_3, label %bb3, label %bb12
bb3: ; preds = %start
br i1 %2, label %bb4, label %bb6
bb4: ; preds = %bb3
%4 = icmp eq i32 %1, 0
tail call void @llvm.assume(i1 %4)
br label %bb12
bb6: ; preds = %bb3
tail call void @llvm.assume(i1 %3)
%5 = icmp eq i32 %0, %1
br label %bb12
bb12: ; preds = %bb4, %bb6, %start
%.1 = phi i1 [ false, %start ], [ %5, %bb6 ], [ true, %bb4 ]
ret i1 %.1
}
declare void @llvm.assume(i1 noundef) #1
attributes #0 = { mustprogress nofree nosync nounwind nonlazybind readnone uwtable willreturn "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #1 = { inaccessiblememonly nofree nosync nounwind willreturn }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 7, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
I believe this is a missed optimization, and it would be correct to compile the function into a simple equality check on the inputs. This is however my first time reading llvm-ir, so I could also be wrong; please let me know if so.
I have tested this on both the rustc LLVM artifacts and on godbolt using trunk. If I should provide anymore metadata, please let me know. This example is the result of compiling fairly idiomatic Rust, and I can provide other similar (but probably ultimately equivalent) examples if needed.