Skip to content

Missed optimization for combination of conditionals #52622

Open
@JakobDegen

Description

@JakobDegen

rustc emits the following LLVM-IR:

Original IR
; ModuleID = 'test.6a688027-cgu.0'
source_filename = "test.6a688027-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; core::hint::unreachable_unchecked
; Function Attrs: inlinehint noreturn nonlazybind uwtable
define internal void @_ZN4core4hint21unreachable_unchecked17h79872ecff0660357E() unnamed_addr #0 {
start:
  unreachable
}

; <core::num::nonzero::NonZeroU32 as core::cmp::PartialEq>::eq
; Function Attrs: inlinehint nonlazybind uwtable
define internal zeroext i1 @"_ZN71_$LT$core..num..nonzero..NonZeroU32$u20$as$u20$core..cmp..PartialEq$GT$2eq17hcc6145d807d01b45E"(i32* noalias readonly align 4 dereferenceable(4) %self, i32* noalias readonly align 4 dereferenceable(4) %other) unnamed_addr #1 {
start:
  %_5 = load i32, i32* %self, align 4
  %_6 = load i32, i32* %other, align 4
  %0 = icmp eq i32 %_5, %_6
  ret i1 %0
}

; test::disc
; Function Attrs: nonlazybind uwtable
define internal i8 @_ZN4test4disc17hd5bc60725a718a4dE(i32* noalias readonly align 4 dereferenceable(4) %a) unnamed_addr #2 {
start:
  %0 = alloca i8, align 1
  %1 = load i32, i32* %a, align 4
  %2 = icmp eq i32 %1, 0
  %_2 = select i1 %2, i64 0, i64 1
  switch i64 %_2, label %bb2 [
    i64 0, label %bb1
    i64 1, label %bb3
  ]

bb2:                                              ; preds = %start
  unreachable

bb1:                                              ; preds = %start
  store i8 0, i8* %0, align 1
  br label %bb4

bb3:                                              ; preds = %start
  store i8 1, i8* %0, align 1
  br label %bb4

bb4:                                              ; preds = %bb1, %bb3
  %3 = load i8, i8* %0, align 1
  ret i8 %3
}

; test::cmp3
; Function Attrs: nonlazybind uwtable
define zeroext i1 @_ZN4test4cmp317h6f7d157bae19d84dE(i32 %0, i32 %1) unnamed_addr #2 {
start:
  %y = alloca i32, align 4
  %x = alloca i32, align 4
  %_10 = alloca { i32, i32 }, align 4
  %2 = alloca i8, align 1
  %b = alloca i32, align 4
  %a = alloca i32, align 4
  store i32 %0, i32* %a, align 4
  store i32 %1, i32* %b, align 4
; call test::disc
  %_4 = call i8 @_ZN4test4disc17hd5bc60725a718a4dE(i32* noalias readonly align 4 dereferenceable(4) %a)
  br label %bb1

bb1:                                              ; preds = %start
; call test::disc
  %_7 = call i8 @_ZN4test4disc17hd5bc60725a718a4dE(i32* noalias readonly align 4 dereferenceable(4) %b)
  br label %bb2

bb2:                                              ; preds = %bb1
  %_3 = icmp eq i8 %_4, %_7
  br i1 %_3, label %bb3, label %bb11

bb11:                                             ; preds = %bb2
  store i8 0, i8* %2, align 1
  br label %bb12

bb3:                                              ; preds = %bb2
  %3 = bitcast { i32, i32 }* %_10 to i8*
  call void @llvm.lifetime.start.p0i8(i64 8, i8* %3)
  %_11 = load i32, i32* %a, align 4
  %_12 = load i32, i32* %b, align 4
  %4 = bitcast { i32, i32 }* %_10 to i32*
  store i32 %_11, i32* %4, align 4
  %5 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %_10, i32 0, i32 1
  store i32 %_12, i32* %5, align 4
  %6 = bitcast { i32, i32 }* %_10 to i32*
  %7 = load i32, i32* %6, align 4
  %8 = icmp eq i32 %7, 0
  %_15 = select i1 %8, i64 0, i64 1
  switch i64 %_15, label %bb5 [
    i64 0, label %bb4
    i64 1, label %bb6
  ]

bb5:                                              ; preds = %bb4, %bb6, %bb3
; call core::hint::unreachable_unchecked
  call void @_ZN4core4hint21unreachable_unchecked17h79872ecff0660357E()
  unreachable

bb4:                                              ; preds = %bb3
  %9 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %_10, i32 0, i32 1
  %10 = load i32, i32* %9, align 4
  %11 = icmp eq i32 %10, 0
  %_13 = select i1 %11, i64 0, i64 1
  %12 = icmp eq i64 %_13, 0
  br i1 %12, label %bb9, label %bb5

bb6:                                              ; preds = %bb3
  %13 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %_10, i32 0, i32 1
  %14 = load i32, i32* %13, align 4
  %15 = icmp eq i32 %14, 0
  %_14 = select i1 %15, i64 0, i64 1
  %16 = icmp eq i64 %_14, 1
  br i1 %16, label %bb7, label %bb5

bb7:                                              ; preds = %bb6
  %17 = bitcast i32* %x to i8*
  call void @llvm.lifetime.start.p0i8(i64 4, i8* %17)
  %18 = bitcast { i32, i32 }* %_10 to i32*
  %19 = load i32, i32* %18, align 4, !range !2
  store i32 %19, i32* %x, align 4
  %20 = bitcast i32* %y to i8*
  call void @llvm.lifetime.start.p0i8(i64 4, i8* %20)
  %21 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %_10, i32 0, i32 1
  %22 = load i32, i32* %21, align 4, !range !2
  store i32 %22, i32* %y, align 4
; call <core::num::nonzero::NonZeroU32 as core::cmp::PartialEq>::eq
  %23 = call zeroext i1 @"_ZN71_$LT$core..num..nonzero..NonZeroU32$u20$as$u20$core..cmp..PartialEq$GT$2eq17hcc6145d807d01b45E"(i32* noalias readonly align 4 dereferenceable(4) %x, i32* noalias readonly align 4 dereferenceable(4) %y)
  %24 = zext i1 %23 to i8
  store i8 %24, i8* %2, align 1
  br label %bb8

bb8:                                              ; preds = %bb7
  %25 = bitcast i32* %y to i8*
  call void @llvm.lifetime.end.p0i8(i64 4, i8* %25)
  %26 = bitcast i32* %x to i8*
  call void @llvm.lifetime.end.p0i8(i64 4, i8* %26)
  br label %bb10

bb10:                                             ; preds = %bb9, %bb8
  %27 = bitcast { i32, i32 }* %_10 to i8*
  call void @llvm.lifetime.end.p0i8(i64 8, i8* %27)
  br label %bb12

bb9:                                              ; preds = %bb4
  store i8 1, i8* %2, align 1
  br label %bb10

bb12:                                             ; preds = %bb11, %bb10
  %28 = load i8, i8* %2, align 1, !range !3
  %29 = trunc i8 %28 to i1
  ret i1 %29
}

; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3

; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3

attributes #0 = { inlinehint noreturn nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #1 = { inlinehint nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #2 = { nonlazybind uwtable "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #3 = { argmemonly nofree nosync nounwind willreturn }

!llvm.module.flags = !{!0, !1}

!0 = !{i32 7, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{i32 1, i32 0}
!3 = !{i8 0, i8 2}

which after optimization becomes

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define zeroext i1 @_ZN4test4cmp317h6f7d157bae19d84dE(i32 %0, i32 %1) unnamed_addr #0 {
start:
  %2 = icmp eq i32 %0, 0
  %3 = icmp ne i32 %1, 0
  %_3 = xor i1 %2, %3
  br i1 %_3, label %bb3, label %bb12

bb3:                                              ; preds = %start
  br i1 %2, label %bb4, label %bb6

bb4:                                              ; preds = %bb3
  %4 = icmp eq i32 %1, 0
  tail call void @llvm.assume(i1 %4)
  br label %bb12

bb6:                                              ; preds = %bb3
  tail call void @llvm.assume(i1 %3)
  %5 = icmp eq i32 %0, %1
  br label %bb12

bb12:                                             ; preds = %bb4, %bb6, %start
  %.1 = phi i1 [ false, %start ], [ %5, %bb6 ], [ true, %bb4 ]
  ret i1 %.1
}

declare void @llvm.assume(i1 noundef) #1

attributes #0 = { mustprogress nofree nosync nounwind nonlazybind readnone uwtable willreturn "probe-stack"="__rust_probestack" "target-cpu"="x86-64" }
attributes #1 = { inaccessiblememonly nofree nosync nounwind willreturn }

!llvm.module.flags = !{!0, !1}

!0 = !{i32 7, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}

I believe this is a missed optimization, and it would be correct to compile the function into a simple equality check on the inputs. This is however my first time reading llvm-ir, so I could also be wrong; please let me know if so.

I have tested this on both the rustc LLVM artifacts and on godbolt using trunk. If I should provide anymore metadata, please let me know. This example is the result of compiling fairly idiomatic Rust, and I can provide other similar (but probably ultimately equivalent) examples if needed.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions