diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index e7a1f07c0270d..32263a3fad6bb 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -9585,29 +9585,72 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, bool HasNSW = IIQ.hasNoSignedWrap(&BO); bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); - // If the caller expects a signed compare, then try to use a signed range. - // Otherwise if both no-wraps are set, use the unsigned range because it - // is never larger than the signed range. Example: - // "sub nuw nsw i8 -2, x" is unsigned [0, 254] vs. signed [-128, 126]. - // "sub nuw nsw i8 2, x" is unsigned [0, 2] vs. signed [-125, 127]. - if (PreferSignedRange && HasNSW && HasNUW) - HasNUW = false; - - if (HasNUW) { - // 'sub nuw c, x' produces [0, C]. - Upper = *C + 1; - } else if (HasNSW) { + // Build the two candidate ranges as [lo..hi]: + // unsignedRange: NUW ⇒ [0 .. C] + // signedRange: NSW ⇒ either [SINT_MIN .. -C - SINT_MIN] or [C - + // SINT_MAX .. SINT_MAX] + auto makeUnsignedRange = [&]() { + return std::pair(APInt::getZero(Width), *C); + }; + auto makeSignedRange = [&]() { if (C->isNegative()) { - // 'sub nsw -C, x' produces [SINT_MIN, -C - SINT_MIN]. - Lower = APInt::getSignedMinValue(Width); - Upper = *C - APInt::getSignedMaxValue(Width); + // sub nsw -C, x + APInt lo = APInt::getSignedMinValue(Width); + APInt hi = *C - APInt::getSignedMinValue(Width); + return std::pair(lo, hi); + } else { + // sub nsw C, x + APInt lo = *C - APInt::getSignedMaxValue(Width); + APInt hi = APInt::getSignedMaxValue(Width); + return std::pair(lo, hi); + } + }; + + // Split a (possibly wrapping) [lo..hi] into up to two non‑wrapping + // pieces: + auto splitPieces = [&](std::pair rng, + SmallVectorImpl> &pieces) { + APInt lo = rng.first, hi = rng.second; + if (lo.ugt(hi)) { + // wraps around 2^n + pieces.emplace_back(lo, APInt::getMaxValue(Width)); // [lo..2^n-1] + pieces.emplace_back(APInt::getZero(Width), hi); // [0..hi] } else { - // Note that sub 0, INT_MIN is not NSW. It techically is a signed wrap - // 'sub nsw C, x' produces [C - SINT_MAX, SINT_MAX]. - Lower = *C - APInt::getSignedMaxValue(Width); - Upper = APInt::getSignedMinValue(Width); + pieces.emplace_back(lo, hi); + } + }; + + SmallVector, 2> piecesU, piecesS; + if (HasNUW) + splitPieces(makeUnsignedRange(), piecesU); + if (HasNSW) + splitPieces(makeSignedRange(), piecesS); + + // Intersect piecewise: + SmallVector, 2> inters; + for (auto &u : piecesU) { + for (auto &s : piecesS) { + APInt loI = u.first.ugt(s.first) ? u.first : s.first; + APInt hiI = u.second.ult(s.second) ? u.second : s.second; + if (loI.ule(hiI)) + inters.emplace_back(loI, hiI); } } + + if (inters.size() == 1) { + // Exactly one contiguous overlap → use it + Lower = inters[0].first; + Upper = inters[0].second; + } else if (HasNUW && !PreferSignedRange) { + // Fallback to plain NUW result [0..C] + Lower = APInt::getZero(Width); + Upper = *C; + } else if (HasNSW) { + // Fallback to plain NSW result + auto S = makeSignedRange(); + Lower = S.first; + Upper = S.second; + } } break; case Instruction::Add: @@ -9615,26 +9658,74 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, bool HasNSW = IIQ.hasNoSignedWrap(&BO); bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); - // If the caller expects a signed compare, then try to use a signed - // range. Otherwise if both no-wraps are set, use the unsigned range - // because it is never larger than the signed range. Example: "add nuw - // nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. - if (PreferSignedRange && HasNSW && HasNUW) - HasNUW = false; + // Build the two candidate ranges as [lo..hi] in the unsigned 0..2^n-1 + // world: + // NUW: 'add nuw x, C' ⇒ [ C .. UINT_MAX ] + auto makeUnsignedRange = [&]() { + APInt lo = *C; + APInt hi = APInt::getMaxValue(Width); + return std::pair(lo, hi); + }; - if (HasNUW) { - // 'add nuw x, C' produces [C, UINT_MAX]. - Lower = *C; - } else if (HasNSW) { + // NSW: 'add nsw x, C' + // if C<0: [ SINT_MIN .. SINT_MAX + C ] + // else: [ SINT_MIN + C .. SINT_MAX ] + auto makeSignedRange = [&]() { if (C->isNegative()) { - // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. - Lower = APInt::getSignedMinValue(Width); - Upper = APInt::getSignedMaxValue(Width) + *C + 1; + APInt lo = APInt::getSignedMinValue(Width); + APInt hi = APInt::getSignedMaxValue(Width) + *C; + return std::pair(lo, hi); } else { - // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. - Lower = APInt::getSignedMinValue(Width) + *C; - Upper = APInt::getSignedMaxValue(Width) + 1; + APInt lo = APInt::getSignedMinValue(Width) + *C; + APInt hi = APInt::getSignedMaxValue(Width); + return std::pair(lo, hi); } + }; + + // Split [lo..hi] into up to two non‑wrapping intervals: + auto splitPieces = [&](std::pair rng, + SmallVectorImpl> &dst) { + APInt lo = rng.first, hi = rng.second; + if (lo.ugt(hi)) { + // wraps around 2^n + dst.emplace_back(lo, APInt::getMaxValue(Width)); + dst.emplace_back(APInt::getZero(Width), hi); + } else { + dst.emplace_back(lo, hi); + } + }; + + SmallVector, 2> piecesU, piecesS; + if (HasNUW) + splitPieces(makeUnsignedRange(), piecesU); + if (HasNSW) + splitPieces(makeSignedRange(), piecesS); + + // Intersect piecewise + SmallVector, 2> inters; + for (auto &u : piecesU) { + for (auto &s : piecesS) { + APInt loI = u.first.ugt(s.first) ? u.first : s.first; + APInt hiI = u.second.ult(s.second) ? u.second : s.second; + if (loI.ule(hiI)) + inters.emplace_back(loI, hiI); + } + } + + if (inters.size() == 1) { + // Exactly one contiguous overlap ⇒ use it + Lower = inters[0].first; + Upper = inters[0].second + + 1; // make Upper exclusive if you’re following [Lo..Hi) + } else if (HasNUW && !PreferSignedRange) { + // Fallback to plain NUW [C..UINT_MAX] + Lower = *C; + Upper = APInt::getMaxValue(Width) + 1; + } else if (HasNSW) { + // Fallback to plain NSW + auto S = makeSignedRange(); + Lower = S.first; + Upper = S.second + 1; } } break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 77e713256d247..ba0a222420bda 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1412,8 +1412,7 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; entry: @@ -1502,8 +1501,7 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 3567aff0ace4e..6e798dae78f71 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -67,8 +67,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: -; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: ; ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided1 @@ -205,8 +204,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: -; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: ; ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided2 @@ -335,8 +333,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: -; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: ; ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided3 @@ -490,8 +487,7 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: -; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: ; ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided_factor4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll index 1b0feef3e6664..341cef691b6dc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll @@ -52,8 +52,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_EPILOGUE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: -; SCALAR_EPILOGUE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; SCALAR_EPILOGUE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALAR_EPILOGUE-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_EPILOGUE: scalar.ph: ; ; PREDICATED_TAIL_FOLDING-LABEL: define void @masked_strided_factor2 @@ -237,8 +236,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_EPILOGUE-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: -; SCALAR_EPILOGUE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 -; SCALAR_EPILOGUE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALAR_EPILOGUE-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_EPILOGUE: scalar.ph: ; ; PREDICATED_TAIL_FOLDING-LABEL: define void @masked_strided_factor4