Skip to content

Commit 039d887

Browse files
committedJan 25, 2024
Auto merge of rust-lang#119911 - NCGThompson:is-statically-known, r=oli-obk
Replacement of rust-lang#114390: Add new intrinsic `is_var_statically_known` and optimize pow for powers of two This adds a new intrinsic `is_val_statically_known` that lowers to [``@llvm.is.constant.*`](https://llvm.org/docs/LangRef.html#llvm-is-constant-intrinsic).` It also applies the intrinsic in the int_pow methods to recognize and optimize the idiom `2isize.pow(x)`. See rust-lang#114390 for more discussion. While I have extended the scope of the power of two optimization from rust-lang#114390, I haven't added any new uses for the intrinsic. That can be done in later pull requests. Note: When testing or using the library, be sure to use `--stage 1` or higher. Otherwise, the intrinsic will be a noop and the doctests will be skipped. If you are trying out edits, you may be interested in [`--keep-stage 0`](https://rustc-dev-guide.rust-lang.org/building/suggested.html#faster-builds-with---keep-stage). Fixes rust-lang#47234 Resolves rust-lang#114390 `@Centri3`

File tree

17 files changed

+632
-152
lines changed

17 files changed

+632
-152
lines changed
 

‎compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,12 @@ fn codegen_regular_intrinsic_call<'tcx>(
443443

444444
ret.write_cvalue(fx, a);
445445
}
446+
sym::is_val_statically_known => {
447+
intrinsic_args!(fx, args => (_a); intrinsic);
448+
449+
let res = fx.bcx.ins().iconst(types::I8, 0);
450+
ret.write_cvalue(fx, CValue::by_val(res, ret.layout()));
451+
}
446452
sym::breakpoint => {
447453
intrinsic_args!(fx, args => (); intrinsic);
448454

‎compiler/rustc_codegen_gcc/src/context.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,15 +196,16 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
196196

197197
let mut functions = FxHashMap::default();
198198
let builtins = [
199-
"__builtin_unreachable", "abort", "__builtin_expect", "__builtin_add_overflow", "__builtin_mul_overflow",
200-
"__builtin_saddll_overflow", /*"__builtin_sadd_overflow",*/ "__builtin_smulll_overflow", /*"__builtin_smul_overflow",*/
199+
"__builtin_unreachable", "abort", "__builtin_expect", /*"__builtin_expect_with_probability",*/
200+
"__builtin_constant_p", "__builtin_add_overflow", "__builtin_mul_overflow", "__builtin_saddll_overflow",
201+
/*"__builtin_sadd_overflow",*/ "__builtin_smulll_overflow", /*"__builtin_smul_overflow",*/
201202
"__builtin_ssubll_overflow", /*"__builtin_ssub_overflow",*/ "__builtin_sub_overflow", "__builtin_uaddll_overflow",
202203
"__builtin_uadd_overflow", "__builtin_umulll_overflow", "__builtin_umul_overflow", "__builtin_usubll_overflow",
203204
"__builtin_usub_overflow", "sqrtf", "sqrt", "__builtin_powif", "__builtin_powi", "sinf", "sin", "cosf", "cos",
204205
"powf", "pow", "expf", "exp", "exp2f", "exp2", "logf", "log", "log10f", "log10", "log2f", "log2", "fmaf",
205206
"fma", "fabsf", "fabs", "fminf", "fmin", "fmaxf", "fmax", "copysignf", "copysign", "floorf", "floor", "ceilf",
206207
"ceil", "truncf", "trunc", "rintf", "rint", "nearbyintf", "nearbyint", "roundf", "round",
207-
"__builtin_expect_with_probability",
208+
208209
];
209210

210211
for builtin in builtins.iter() {

‎compiler/rustc_codegen_gcc/src/intrinsic/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
123123
sym::unlikely => {
124124
self.expect(args[0].immediate(), false)
125125
}
126+
sym::is_val_statically_known => {
127+
let a = args[0].immediate();
128+
let builtin = self.context.get_builtin_function("__builtin_constant_p");
129+
let res = self.context.new_call(None, builtin, &[a]);
130+
self.icmp(IntPredicate::IntEQ, res, self.const_i32(0))
131+
}
126132
kw::Try => {
127133
try_intrinsic(
128134
self,

‎compiler/rustc_codegen_llvm/src/context.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,20 @@ impl<'ll> CodegenCx<'ll, '_> {
916916
ifn!("llvm.lifetime.start.p0i8", fn(t_i64, ptr) -> void);
917917
ifn!("llvm.lifetime.end.p0i8", fn(t_i64, ptr) -> void);
918918

919+
// FIXME: This is an infinitesimally small portion of the types you can
920+
// pass to this intrinsic, if we can ever lazily register intrinsics we
921+
// should register these when they're used, that way any type can be
922+
// passed.
923+
ifn!("llvm.is.constant.i1", fn(i1) -> i1);
924+
ifn!("llvm.is.constant.i8", fn(t_i8) -> i1);
925+
ifn!("llvm.is.constant.i16", fn(t_i16) -> i1);
926+
ifn!("llvm.is.constant.i32", fn(t_i32) -> i1);
927+
ifn!("llvm.is.constant.i64", fn(t_i64) -> i1);
928+
ifn!("llvm.is.constant.i128", fn(t_i128) -> i1);
929+
ifn!("llvm.is.constant.isize", fn(t_isize) -> i1);
930+
ifn!("llvm.is.constant.f32", fn(t_f32) -> i1);
931+
ifn!("llvm.is.constant.f64", fn(t_f64) -> i1);
932+
919933
ifn!("llvm.expect.i1", fn(i1, i1) -> i1);
920934
ifn!("llvm.eh.typeid.for", fn(ptr) -> t_i32);
921935
ifn!("llvm.localescape", fn(...) -> void);

‎compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> {
119119
sym::likely => {
120120
self.call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(true)])
121121
}
122+
sym::is_val_statically_known => self.call_intrinsic(
123+
&format!("llvm.is.constant.{:?}", args[0].layout.immediate_llvm_type(self.cx)),
124+
&[args[0].immediate()],
125+
),
122126
sym::unlikely => self
123127
.call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(false)]),
124128
kw::Try => {

‎compiler/rustc_const_eval/src/const_eval/machine.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,11 @@ impl<'mir, 'tcx> interpret::Machine<'mir, 'tcx> for CompileTimeInterpreter<'mir,
531531
)?;
532532
}
533533
}
534+
// The intrinsic represents whether the value is known to the optimizer (LLVM).
535+
// We're not doing any optimizations here, so there is no optimizer that could know the value.
536+
// (We know the value here in the machine of course, but this is the runtime of that code,
537+
// not the optimization stage.)
538+
sym::is_val_statically_known => ecx.write_scalar(Scalar::from_bool(false), dest)?,
534539
_ => {
535540
throw_unsup_format!(
536541
"intrinsic `{intrinsic_name}` is not supported at compile-time"

‎compiler/rustc_hir_analysis/src/check/intrinsic.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,8 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
453453

454454
sym::black_box => (1, vec![param(0)], param(0)),
455455

456+
sym::is_val_statically_known => (1, vec![param(0)], tcx.types.bool),
457+
456458
sym::const_eval_select => (4, vec![param(0), param(1), param(2)], param(3)),
457459

458460
sym::vtable_size | sym::vtable_align => {

‎compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,7 @@ symbols! {
910910
io_stderr,
911911
io_stdout,
912912
irrefutable_let_patterns,
913+
is_val_statically_known,
913914
isa_attribute,
914915
isize,
915916
issue,

‎library/core/src/intrinsics.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2517,6 +2517,66 @@ extern "rust-intrinsic" {
25172517
where
25182518
G: FnOnce<ARG, Output = RET>,
25192519
F: FnOnce<ARG, Output = RET>;
2520+
2521+
/// Returns whether the argument's value is statically known at
2522+
/// compile-time.
2523+
///
2524+
/// This is useful when there is a way of writing the code that will
2525+
/// be *faster* when some variables have known values, but *slower*
2526+
/// in the general case: an `if is_val_statically_known(var)` can be used
2527+
/// to select between these two variants. The `if` will be optimized away
2528+
/// and only the desired branch remains.
2529+
///
2530+
/// Formally speaking, this function non-deterministically returns `true`
2531+
/// or `false`, and the caller has to ensure sound behavior for both cases.
2532+
/// In other words, the following code has *Undefined Behavior*:
2533+
///
2534+
/// ```
2535+
/// #![feature(is_val_statically_known)]
2536+
/// #![feature(core_intrinsics)]
2537+
/// # #![allow(internal_features)]
2538+
/// use std::hint::unreachable_unchecked;
2539+
/// use std::intrinsics::is_val_statically_known;
2540+
///
2541+
/// unsafe {
2542+
/// if !is_val_statically_known(0) { unreachable_unchecked(); }
2543+
/// }
2544+
/// ```
2545+
///
2546+
/// This also means that the following code's behavior is unspecified; it
2547+
/// may panic, or it may not:
2548+
///
2549+
/// ```no_run
2550+
/// #![feature(is_val_statically_known)]
2551+
/// #![feature(core_intrinsics)]
2552+
/// # #![allow(internal_features)]
2553+
/// use std::intrinsics::is_val_statically_known;
2554+
///
2555+
/// unsafe {
2556+
/// assert_eq!(is_val_statically_known(0), is_val_statically_known(0));
2557+
/// }
2558+
/// ```
2559+
///
2560+
/// Unsafe code may not rely on `is_val_statically_known` returning any
2561+
/// particular value, ever. However, the compiler will generally make it
2562+
/// return `true` only if the value of the argument is actually known.
2563+
///
2564+
/// When calling this in a `const fn`, both paths must be semantically
2565+
/// equivalent, that is, the result of the `true` branch and the `false`
2566+
/// branch must return the same value and have the same side-effects *no
2567+
/// matter what*.
2568+
#[rustc_const_unstable(feature = "is_val_statically_known", issue = "none")]
2569+
#[rustc_nounwind]
2570+
#[cfg(not(bootstrap))]
2571+
pub fn is_val_statically_known<T: Copy>(arg: T) -> bool;
2572+
}
2573+
2574+
// FIXME: Seems using `unstable` here completely ignores `rustc_allow_const_fn_unstable`
2575+
// and thus compiling stage0 core doesn't work.
2576+
#[rustc_const_stable(feature = "is_val_statically_known", since = "0.0.0")]
2577+
#[cfg(bootstrap)]
2578+
pub const unsafe fn is_val_statically_known<T: Copy>(_arg: T) -> bool {
2579+
false
25202580
}
25212581

25222582
// Some functions are defined here because they accidentally got made

‎library/core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@
200200
//
201201
// Language features:
202202
// tidy-alphabetical-start
203+
#![cfg_attr(not(bootstrap), feature(is_val_statically_known))]
203204
#![feature(abi_unadjusted)]
204205
#![feature(adt_const_params)]
205206
#![feature(allow_internal_unsafe)]

‎library/core/src/num/int_macros.rs

Lines changed: 210 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,26 +1374,59 @@ macro_rules! int_impl {
13741374
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
13751375
#[must_use = "this returns the result of the operation, \
13761376
without modifying the original"]
1377+
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
13771378
#[inline]
13781379
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
1379-
if exp == 0 {
1380-
return Some(1);
1381-
}
1382-
let mut base = self;
1383-
let mut acc: Self = 1;
1384-
1385-
while exp > 1 {
1386-
if (exp & 1) == 1 {
1387-
acc = try_opt!(acc.checked_mul(base));
1380+
// SAFETY: This path has the same behavior as the other.
1381+
if unsafe { intrinsics::is_val_statically_known(self) }
1382+
&& self.unsigned_abs().is_power_of_two()
1383+
{
1384+
if self == 1 { // Avoid divide by zero
1385+
return Some(1);
13881386
}
1389-
exp /= 2;
1390-
base = try_opt!(base.checked_mul(base));
1387+
if self == -1 { // Avoid divide by zero
1388+
return Some(if exp & 1 != 0 { -1 } else { 1 });
1389+
}
1390+
// SAFETY: We just checked this is a power of two. and above zero.
1391+
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
1392+
if exp > Self::BITS / power_used { return None; } // Division of constants is free
1393+
1394+
// SAFETY: exp <= Self::BITS / power_used
1395+
let res = unsafe { intrinsics::unchecked_shl(
1396+
1 as Self,
1397+
intrinsics::unchecked_mul(power_used, exp) as Self
1398+
)};
1399+
// LLVM doesn't always optimize out the checks
1400+
// at the ir level.
1401+
1402+
let sign = self.is_negative() && exp & 1 != 0;
1403+
if !sign && res == Self::MIN {
1404+
None
1405+
} else if sign {
1406+
Some(res.wrapping_neg())
1407+
} else {
1408+
Some(res)
1409+
}
1410+
} else {
1411+
if exp == 0 {
1412+
return Some(1);
1413+
}
1414+
let mut base = self;
1415+
let mut acc: Self = 1;
1416+
1417+
while exp > 1 {
1418+
if (exp & 1) == 1 {
1419+
acc = try_opt!(acc.checked_mul(base));
1420+
}
1421+
exp /= 2;
1422+
base = try_opt!(base.checked_mul(base));
1423+
}
1424+
// since exp!=0, finally the exp must be 1.
1425+
// Deal with the final bit of the exponent separately, since
1426+
// squaring the base afterwards is not necessary and may cause a
1427+
// needless overflow.
1428+
acc.checked_mul(base)
13911429
}
1392-
// since exp!=0, finally the exp must be 1.
1393-
// Deal with the final bit of the exponent separately, since
1394-
// squaring the base afterwards is not necessary and may cause a
1395-
// needless overflow.
1396-
acc.checked_mul(base)
13971430
}
13981431

13991432
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
@@ -2058,27 +2091,58 @@ macro_rules! int_impl {
20582091
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
20592092
#[must_use = "this returns the result of the operation, \
20602093
without modifying the original"]
2094+
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
20612095
#[inline]
20622096
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
2063-
if exp == 0 {
2064-
return 1;
2065-
}
2066-
let mut base = self;
2067-
let mut acc: Self = 1;
2068-
2069-
while exp > 1 {
2070-
if (exp & 1) == 1 {
2071-
acc = acc.wrapping_mul(base);
2097+
// SAFETY: This path has the same behavior as the other.
2098+
if unsafe { intrinsics::is_val_statically_known(self) }
2099+
&& self.unsigned_abs().is_power_of_two()
2100+
{
2101+
if self == 1 { // Avoid divide by zero
2102+
return 1;
2103+
}
2104+
if self == -1 { // Avoid divide by zero
2105+
return if exp & 1 != 0 { -1 } else { 1 };
2106+
}
2107+
// SAFETY: We just checked this is a power of two. and above zero.
2108+
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
2109+
if exp > Self::BITS / power_used { return 0; } // Division of constants is free
2110+
2111+
// SAFETY: exp <= Self::BITS / power_used
2112+
let res = unsafe { intrinsics::unchecked_shl(
2113+
1 as Self,
2114+
intrinsics::unchecked_mul(power_used, exp) as Self
2115+
)};
2116+
// LLVM doesn't always optimize out the checks
2117+
// at the ir level.
2118+
2119+
let sign = self.is_negative() && exp & 1 != 0;
2120+
if sign {
2121+
res.wrapping_neg()
2122+
} else {
2123+
res
2124+
}
2125+
} else {
2126+
if exp == 0 {
2127+
return 1;
2128+
}
2129+
let mut base = self;
2130+
let mut acc: Self = 1;
2131+
2132+
while exp > 1 {
2133+
if (exp & 1) == 1 {
2134+
acc = acc.wrapping_mul(base);
2135+
}
2136+
exp /= 2;
2137+
base = base.wrapping_mul(base);
20722138
}
2073-
exp /= 2;
2074-
base = base.wrapping_mul(base);
2075-
}
20762139

2077-
// since exp!=0, finally the exp must be 1.
2078-
// Deal with the final bit of the exponent separately, since
2079-
// squaring the base afterwards is not necessary and may cause a
2080-
// needless overflow.
2081-
acc.wrapping_mul(base)
2140+
// since exp!=0, finally the exp must be 1.
2141+
// Deal with the final bit of the exponent separately, since
2142+
// squaring the base afterwards is not necessary and may cause a
2143+
// needless overflow.
2144+
acc.wrapping_mul(base)
2145+
}
20822146
}
20832147

20842148
/// Calculates `self` + `rhs`
@@ -2561,36 +2625,68 @@ macro_rules! int_impl {
25612625
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
25622626
#[must_use = "this returns the result of the operation, \
25632627
without modifying the original"]
2628+
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
25642629
#[inline]
25652630
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
2566-
if exp == 0 {
2567-
return (1,false);
2568-
}
2569-
let mut base = self;
2570-
let mut acc: Self = 1;
2571-
let mut overflown = false;
2572-
// Scratch space for storing results of overflowing_mul.
2573-
let mut r;
2574-
2575-
while exp > 1 {
2576-
if (exp & 1) == 1 {
2577-
r = acc.overflowing_mul(base);
2578-
acc = r.0;
2631+
// SAFETY: This path has the same behavior as the other.
2632+
if unsafe { intrinsics::is_val_statically_known(self) }
2633+
&& self.unsigned_abs().is_power_of_two()
2634+
{
2635+
if self == 1 { // Avoid divide by zero
2636+
return (1, false);
2637+
}
2638+
if self == -1 { // Avoid divide by zero
2639+
return (if exp & 1 != 0 { -1 } else { 1 }, false);
2640+
}
2641+
// SAFETY: We just checked this is a power of two. and above zero.
2642+
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
2643+
if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free
2644+
2645+
// SAFETY: exp <= Self::BITS / power_used
2646+
let res = unsafe { intrinsics::unchecked_shl(
2647+
1 as Self,
2648+
intrinsics::unchecked_mul(power_used, exp) as Self
2649+
)};
2650+
// LLVM doesn't always optimize out the checks
2651+
// at the ir level.
2652+
2653+
let sign = self.is_negative() && exp & 1 != 0;
2654+
let overflow = res == Self::MIN;
2655+
if sign {
2656+
(res.wrapping_neg(), overflow)
2657+
} else {
2658+
(res, overflow)
2659+
}
2660+
} else {
2661+
if exp == 0 {
2662+
return (1,false);
2663+
}
2664+
let mut base = self;
2665+
let mut acc: Self = 1;
2666+
let mut overflown = false;
2667+
// Scratch space for storing results of overflowing_mul.
2668+
let mut r;
2669+
2670+
while exp > 1 {
2671+
if (exp & 1) == 1 {
2672+
r = acc.overflowing_mul(base);
2673+
acc = r.0;
2674+
overflown |= r.1;
2675+
}
2676+
exp /= 2;
2677+
r = base.overflowing_mul(base);
2678+
base = r.0;
25792679
overflown |= r.1;
25802680
}
2581-
exp /= 2;
2582-
r = base.overflowing_mul(base);
2583-
base = r.0;
2584-
overflown |= r.1;
2585-
}
25862681

2587-
// since exp!=0, finally the exp must be 1.
2588-
// Deal with the final bit of the exponent separately, since
2589-
// squaring the base afterwards is not necessary and may cause a
2590-
// needless overflow.
2591-
r = acc.overflowing_mul(base);
2592-
r.1 |= overflown;
2593-
r
2682+
// since exp!=0, finally the exp must be 1.
2683+
// Deal with the final bit of the exponent separately, since
2684+
// squaring the base afterwards is not necessary and may cause a
2685+
// needless overflow.
2686+
r = acc.overflowing_mul(base);
2687+
r.1 |= overflown;
2688+
r
2689+
}
25942690
}
25952691

25962692
/// Raises self to the power of `exp`, using exponentiation by squaring.
@@ -2608,28 +2704,68 @@ macro_rules! int_impl {
26082704
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
26092705
#[must_use = "this returns the result of the operation, \
26102706
without modifying the original"]
2707+
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
26112708
#[inline]
26122709
#[rustc_inherit_overflow_checks]
2710+
#[track_caller] // Hides the hackish overflow check for powers of two.
26132711
pub const fn pow(self, mut exp: u32) -> Self {
2614-
if exp == 0 {
2615-
return 1;
2616-
}
2617-
let mut base = self;
2618-
let mut acc = 1;
2712+
// SAFETY: This path has the same behavior as the other.
2713+
if unsafe { intrinsics::is_val_statically_known(self) }
2714+
&& self.unsigned_abs().is_power_of_two()
2715+
{
2716+
if self == 1 { // Avoid divide by zero
2717+
return 1;
2718+
}
2719+
if self == -1 { // Avoid divide by zero
2720+
return if exp & 1 != 0 { -1 } else { 1 };
2721+
}
2722+
// SAFETY: We just checked this is a power of two. and above zero.
2723+
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
2724+
if exp > Self::BITS / power_used { // Division of constants is free
2725+
#[allow(arithmetic_overflow)]
2726+
return Self::MAX * Self::MAX * 0;
2727+
}
26192728

2620-
while exp > 1 {
2621-
if (exp & 1) == 1 {
2622-
acc = acc * base;
2729+
// SAFETY: exp <= Self::BITS / power_used
2730+
let res = unsafe { intrinsics::unchecked_shl(
2731+
1 as Self,
2732+
intrinsics::unchecked_mul(power_used, exp) as Self
2733+
)};
2734+
// LLVM doesn't always optimize out the checks
2735+
// at the ir level.
2736+
2737+
let sign = self.is_negative() && exp & 1 != 0;
2738+
#[allow(arithmetic_overflow)]
2739+
if !sign && res == Self::MIN {
2740+
// So it panics.
2741+
_ = Self::MAX * Self::MAX;
2742+
}
2743+
if sign {
2744+
res.wrapping_neg()
2745+
} else {
2746+
res
2747+
}
2748+
} else {
2749+
if exp == 0 {
2750+
return 1;
2751+
}
2752+
let mut base = self;
2753+
let mut acc = 1;
2754+
2755+
while exp > 1 {
2756+
if (exp & 1) == 1 {
2757+
acc = acc * base;
2758+
}
2759+
exp /= 2;
2760+
base = base * base;
26232761
}
2624-
exp /= 2;
2625-
base = base * base;
2626-
}
26272762

2628-
// since exp!=0, finally the exp must be 1.
2629-
// Deal with the final bit of the exponent separately, since
2630-
// squaring the base afterwards is not necessary and may cause a
2631-
// needless overflow.
2632-
acc * base
2763+
// since exp!=0, finally the exp must be 1.
2764+
// Deal with the final bit of the exponent separately, since
2765+
// squaring the base afterwards is not necessary and may cause a
2766+
// needless overflow.
2767+
acc * base
2768+
}
26332769
}
26342770

26352771
/// Returns the square root of the number, rounded down.

‎library/core/src/num/uint_macros.rs

Lines changed: 174 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,28 +1364,49 @@ macro_rules! uint_impl {
13641364
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
13651365
#[must_use = "this returns the result of the operation, \
13661366
without modifying the original"]
1367+
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
13671368
#[inline]
13681369
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
1369-
if exp == 0 {
1370-
return Some(1);
1371-
}
1372-
let mut base = self;
1373-
let mut acc: Self = 1;
1374-
1375-
while exp > 1 {
1376-
if (exp & 1) == 1 {
1377-
acc = try_opt!(acc.checked_mul(base));
1370+
// SAFETY: This path has the same behavior as the other.
1371+
if unsafe { intrinsics::is_val_statically_known(self) }
1372+
&& self.is_power_of_two()
1373+
{
1374+
if self == 1 { // Avoid divide by zero
1375+
return Some(1);
1376+
}
1377+
// SAFETY: We just checked this is a power of two. and above zero.
1378+
let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
1379+
if exp > Self::BITS / power_used { return None; } // Division of constants is free
1380+
1381+
// SAFETY: exp <= Self::BITS / power_used
1382+
unsafe { Some(intrinsics::unchecked_shl(
1383+
1 as Self,
1384+
intrinsics::unchecked_mul(power_used, exp) as Self
1385+
)) }
1386+
// LLVM doesn't always optimize out the checks
1387+
// at the ir level.
1388+
} else {
1389+
if exp == 0 {
1390+
return Some(1);
1391+
}
1392+
let mut base = self;
1393+
let mut acc: Self = 1;
1394+
1395+
while exp > 1 {
1396+
if (exp & 1) == 1 {
1397+
acc = try_opt!(acc.checked_mul(base));
1398+
}
1399+
exp /= 2;
1400+
base = try_opt!(base.checked_mul(base));
13781401
}
1379-
exp /= 2;
1380-
base = try_opt!(base.checked_mul(base));
1381-
}
13821402

1383-
// since exp!=0, finally the exp must be 1.
1384-
// Deal with the final bit of the exponent separately, since
1385-
// squaring the base afterwards is not necessary and may cause a
1386-
// needless overflow.
1403+
// since exp!=0, finally the exp must be 1.
1404+
// Deal with the final bit of the exponent separately, since
1405+
// squaring the base afterwards is not necessary and may cause a
1406+
// needless overflow.
13871407

1388-
acc.checked_mul(base)
1408+
acc.checked_mul(base)
1409+
}
13891410
}
13901411

13911412
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
@@ -1887,27 +1908,48 @@ macro_rules! uint_impl {
18871908
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
18881909
#[must_use = "this returns the result of the operation, \
18891910
without modifying the original"]
1911+
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
18901912
#[inline]
18911913
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
1892-
if exp == 0 {
1893-
return 1;
1894-
}
1895-
let mut base = self;
1896-
let mut acc: Self = 1;
1897-
1898-
while exp > 1 {
1899-
if (exp & 1) == 1 {
1900-
acc = acc.wrapping_mul(base);
1914+
// SAFETY: This path has the same behavior as the other.
1915+
if unsafe { intrinsics::is_val_statically_known(self) }
1916+
&& self.is_power_of_two()
1917+
{
1918+
if self == 1 { // Avoid divide by zero
1919+
return 1;
1920+
}
1921+
// SAFETY: We just checked this is a power of two. and above zero.
1922+
let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
1923+
if exp > Self::BITS / power_used { return 0; } // Division of constants is free
1924+
1925+
// SAFETY: exp <= Self::BITS / power_used
1926+
unsafe { intrinsics::unchecked_shl(
1927+
1 as Self,
1928+
intrinsics::unchecked_mul(power_used, exp) as Self
1929+
)}
1930+
// LLVM doesn't always optimize out the checks
1931+
// at the ir level.
1932+
} else {
1933+
if exp == 0 {
1934+
return 1;
1935+
}
1936+
let mut base = self;
1937+
let mut acc: Self = 1;
1938+
1939+
while exp > 1 {
1940+
if (exp & 1) == 1 {
1941+
acc = acc.wrapping_mul(base);
1942+
}
1943+
exp /= 2;
1944+
base = base.wrapping_mul(base);
19011945
}
1902-
exp /= 2;
1903-
base = base.wrapping_mul(base);
1904-
}
19051946

1906-
// since exp!=0, finally the exp must be 1.
1907-
// Deal with the final bit of the exponent separately, since
1908-
// squaring the base afterwards is not necessary and may cause a
1909-
// needless overflow.
1910-
acc.wrapping_mul(base)
1947+
// since exp!=0, finally the exp must be 1.
1948+
// Deal with the final bit of the exponent separately, since
1949+
// squaring the base afterwards is not necessary and may cause a
1950+
// needless overflow.
1951+
acc.wrapping_mul(base)
1952+
}
19111953
}
19121954

19131955
/// Calculates `self` + `rhs`
@@ -2341,37 +2383,58 @@ macro_rules! uint_impl {
23412383
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
23422384
#[must_use = "this returns the result of the operation, \
23432385
without modifying the original"]
2386+
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
23442387
#[inline]
23452388
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
2346-
if exp == 0{
2347-
return (1,false);
2348-
}
2349-
let mut base = self;
2350-
let mut acc: Self = 1;
2351-
let mut overflown = false;
2352-
// Scratch space for storing results of overflowing_mul.
2353-
let mut r;
2354-
2355-
while exp > 1 {
2356-
if (exp & 1) == 1 {
2357-
r = acc.overflowing_mul(base);
2358-
acc = r.0;
2389+
// SAFETY: This path has the same behavior as the other.
2390+
if unsafe { intrinsics::is_val_statically_known(self) }
2391+
&& self.is_power_of_two()
2392+
{
2393+
if self == 1 { // Avoid divide by zero
2394+
return (1, false);
2395+
}
2396+
// SAFETY: We just checked this is a power of two. and above zero.
2397+
let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
2398+
if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free
2399+
2400+
// SAFETY: exp <= Self::BITS / power_used
2401+
unsafe { (intrinsics::unchecked_shl(
2402+
1 as Self,
2403+
intrinsics::unchecked_mul(power_used, exp) as Self
2404+
), false) }
2405+
// LLVM doesn't always optimize out the checks
2406+
// at the ir level.
2407+
} else {
2408+
if exp == 0{
2409+
return (1,false);
2410+
}
2411+
let mut base = self;
2412+
let mut acc: Self = 1;
2413+
let mut overflown = false;
2414+
// Scratch space for storing results of overflowing_mul.
2415+
let mut r;
2416+
2417+
while exp > 1 {
2418+
if (exp & 1) == 1 {
2419+
r = acc.overflowing_mul(base);
2420+
acc = r.0;
2421+
overflown |= r.1;
2422+
}
2423+
exp /= 2;
2424+
r = base.overflowing_mul(base);
2425+
base = r.0;
23592426
overflown |= r.1;
23602427
}
2361-
exp /= 2;
2362-
r = base.overflowing_mul(base);
2363-
base = r.0;
2364-
overflown |= r.1;
2365-
}
23662428

2367-
// since exp!=0, finally the exp must be 1.
2368-
// Deal with the final bit of the exponent separately, since
2369-
// squaring the base afterwards is not necessary and may cause a
2370-
// needless overflow.
2371-
r = acc.overflowing_mul(base);
2372-
r.1 |= overflown;
2429+
// since exp!=0, finally the exp must be 1.
2430+
// Deal with the final bit of the exponent separately, since
2431+
// squaring the base afterwards is not necessary and may cause a
2432+
// needless overflow.
2433+
r = acc.overflowing_mul(base);
2434+
r.1 |= overflown;
23732435

2374-
r
2436+
r
2437+
}
23752438
}
23762439

23772440
/// Raises self to the power of `exp`, using exponentiation by squaring.
@@ -2387,28 +2450,64 @@ macro_rules! uint_impl {
23872450
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
23882451
#[must_use = "this returns the result of the operation, \
23892452
without modifying the original"]
2453+
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
23902454
#[inline]
23912455
#[rustc_inherit_overflow_checks]
2456+
#[track_caller] // Hides the hackish overflow check for powers of two.
23922457
pub const fn pow(self, mut exp: u32) -> Self {
2393-
if exp == 0 {
2394-
return 1;
2395-
}
2396-
let mut base = self;
2397-
let mut acc = 1;
2458+
// LLVM now knows that `self` is a constant value, but not a
2459+
// constant in Rust. This allows us to compute the power used at
2460+
// compile-time.
2461+
//
2462+
// This will likely add a branch in debug builds, but this should
2463+
// be ok.
2464+
//
2465+
// This is a massive performance boost in release builds as you can
2466+
// get the power of a power of two and the exponent through a `shl`
2467+
// instruction, but we must add a couple more checks for parity with
2468+
// our own `pow`.
2469+
// SAFETY: This path has the same behavior as the other.
2470+
if unsafe { intrinsics::is_val_statically_known(self) }
2471+
&& self.is_power_of_two()
2472+
{
2473+
if self == 1 { // Avoid divide by zero
2474+
return 1;
2475+
}
2476+
// SAFETY: We just checked this is a power of two. and above zero.
2477+
let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
2478+
if exp > Self::BITS / power_used { // Division of constants is free
2479+
#[allow(arithmetic_overflow)]
2480+
return Self::MAX * Self::MAX * 0;
2481+
}
23982482

2399-
while exp > 1 {
2400-
if (exp & 1) == 1 {
2401-
acc = acc * base;
2483+
// SAFETY: exp <= Self::BITS / power_used
2484+
unsafe { intrinsics::unchecked_shl(
2485+
1 as Self,
2486+
intrinsics::unchecked_mul(power_used, exp) as Self
2487+
)}
2488+
// LLVM doesn't always optimize out the checks
2489+
// at the ir level.
2490+
} else {
2491+
if exp == 0 {
2492+
return 1;
2493+
}
2494+
let mut base = self;
2495+
let mut acc = 1;
2496+
2497+
while exp > 1 {
2498+
if (exp & 1) == 1 {
2499+
acc = acc * base;
2500+
}
2501+
exp /= 2;
2502+
base = base * base;
24022503
}
2403-
exp /= 2;
2404-
base = base * base;
2405-
}
24062504

2407-
// since exp!=0, finally the exp must be 1.
2408-
// Deal with the final bit of the exponent separately, since
2409-
// squaring the base afterwards is not necessary and may cause a
2410-
// needless overflow.
2411-
acc * base
2505+
// since exp!=0, finally the exp must be 1.
2506+
// Deal with the final bit of the exponent separately, since
2507+
// squaring the base afterwards is not necessary and may cause a
2508+
// needless overflow.
2509+
acc * base
2510+
}
24122511
}
24132512

24142513
/// Returns the square root of the number, rounded down.

‎src/tools/miri/src/shims/intrinsics/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::iter;
55

66
use log::trace;
77

8+
use rand::Rng;
89
use rustc_apfloat::{Float, Round};
910
use rustc_middle::ty::layout::LayoutOf;
1011
use rustc_middle::{
@@ -141,6 +142,17 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
141142
this.write_pointer(Pointer::new(ptr.provenance, masked_addr), dest)?;
142143
}
143144

145+
// We want to return either `true` or `false` at random, or else something like
146+
// ```
147+
// if !is_val_statically_known(0) { unreachable_unchecked(); }
148+
// ```
149+
// Would not be considered UB, or the other way around (`is_val_statically_known(0)`).
150+
"is_val_statically_known" => {
151+
let [_] = check_arg_count(args)?;
152+
let branch: bool = this.machine.rng.get_mut().gen();
153+
this.write_scalar(Scalar::from_bool(branch), dest)?;
154+
}
155+
144156
// Floating-point operations
145157
"fabsf32" => {
146158
let [f] = check_arg_count(args)?;

‎src/tools/miri/tests/pass/intrinsics.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,21 @@ fn main() {
3333
assert_eq!(intrinsics::likely(false), false);
3434
assert_eq!(intrinsics::unlikely(true), true);
3535

36+
let mut saw_true = false;
37+
let mut saw_false = false;
38+
39+
for _ in 0..50 {
40+
if unsafe { intrinsics::is_val_statically_known(0) } {
41+
saw_true = true;
42+
} else {
43+
saw_false = true;
44+
}
45+
}
46+
assert!(
47+
saw_true && saw_false,
48+
"`is_val_statically_known` failed to return both true and false. Congrats, you won the lottery!"
49+
);
50+
3651
intrinsics::forget(Bomb);
3752

3853
let _v = intrinsics::discriminant_value(&Some(()));
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// compile-flags: --crate-type=lib -Zmerge-functions=disabled -O
2+
3+
#![feature(core_intrinsics)]
4+
5+
use std::intrinsics::is_val_statically_known;
6+
7+
pub struct A(u32);
8+
pub enum B {
9+
Ye(u32),
10+
}
11+
12+
#[inline]
13+
pub fn _u32(a: u32) -> i32 {
14+
if unsafe { is_val_statically_known(a) } { 1 } else { 0 }
15+
}
16+
17+
// CHECK-LABEL: @_u32_true(
18+
#[no_mangle]
19+
pub fn _u32_true() -> i32 {
20+
// CHECK: ret i32 1
21+
_u32(1)
22+
}
23+
24+
// CHECK-LABEL: @_u32_false(
25+
#[no_mangle]
26+
pub fn _u32_false(a: u32) -> i32 {
27+
// CHECK: ret i32 0
28+
_u32(a)
29+
}
30+
31+
#[inline]
32+
pub fn _bool(b: bool) -> i32 {
33+
if unsafe { is_val_statically_known(b) } { 3 } else { 2 }
34+
}
35+
36+
// CHECK-LABEL: @_bool_true(
37+
#[no_mangle]
38+
pub fn _bool_true() -> i32 {
39+
// CHECK: ret i32 3
40+
_bool(true)
41+
}
42+
43+
// CHECK-LABEL: @_bool_false(
44+
#[no_mangle]
45+
pub fn _bool_false(b: bool) -> i32 {
46+
// CHECK: ret i32 2
47+
_bool(b)
48+
}

‎tests/codegen/pow_of_two.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// compile-flags: --crate-type=lib -Zmerge-functions=disabled -O -C overflow-checks=false
2+
3+
// CHECK-LABEL: @a(
4+
#[no_mangle]
5+
pub fn a(exp: u32) -> u64 {
6+
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 64
7+
// CHECK: %{{[^ ]+}} = zext i32 %exp to i64
8+
// CHECK: %{{[^ ]+}} = shl nuw i64 {{[^ ]+}}, %{{[^ ]+}}
9+
// CHECK: ret i64 %{{[^ ]+}}
10+
2u64.pow(exp)
11+
}
12+
13+
// CHECK-LABEL: @b(
14+
#[no_mangle]
15+
pub fn b(exp: u32) -> i64 {
16+
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 64
17+
// CHECK: %{{[^ ]+}} = zext i32 %exp to i64
18+
// CHECK: %{{[^ ]+}} = shl nuw i64 {{[^ ]+}}, %{{[^ ]+}}
19+
// CHECK: ret i64 %{{[^ ]+}}
20+
2i64.pow(exp)
21+
}
22+
23+
// CHECK-LABEL: @c(
24+
#[no_mangle]
25+
pub fn c(exp: u32) -> u32 {
26+
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 16
27+
// CHECK: %{{[^ ]+}} = shl nuw nsw i32 %exp, 1
28+
// CHECK: %{{[^ ]+}} = shl nuw i32 1, %{{[^ ]+}}
29+
// CHECK: %{{[^ ]+}} = select i1 %{{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
30+
// CHECK: ret i32 %{{[^ ]+}}
31+
4u32.pow(exp)
32+
}
33+
34+
// CHECK-LABEL: @d(
35+
#[no_mangle]
36+
pub fn d(exp: u32) -> u32 {
37+
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 6
38+
// CHECK: %{{[^ ]+}} = mul nuw nsw i32 %exp, 5
39+
// CHECK: %{{[^ ]+}} = shl nuw nsw i32 1, %{{[^ ]+}}
40+
// CHECK: %{{[^ ]+}} = select i1 {{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
41+
// CHECK: ret i32 %{{[^ ]+}}
42+
32u32.pow(exp)
43+
}
44+
45+
// CHECK-LABEL: @e(
46+
#[no_mangle]
47+
pub fn e(exp: u32) -> i32 {
48+
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 6
49+
// CHECK: %{{[^ ]+}} = mul nuw {{(nsw )?}}i32 %exp, 5
50+
// CHECK: %{{[^ ]+}} = shl nuw {{(nsw )?}}i32 1, %{{[^ ]+}}
51+
// CHECK: %{{[^ ]+}} = select i1 {{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
52+
// CHECK: ret i32 %{{[^ ]+}}
53+
32i32.pow(exp)
54+
}
55+
// note: d and e are expected to yield the same IR
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// run-pass
2+
3+
#![feature(core_intrinsics)]
4+
#![feature(is_val_statically_known)]
5+
6+
use std::intrinsics::is_val_statically_known;
7+
8+
const CONST_TEST: bool = unsafe { is_val_statically_known(0) };
9+
10+
fn main() {
11+
if CONST_TEST {
12+
unreachable!("currently expected to return false during const eval");
13+
// but note that this is not a guarantee!
14+
}
15+
}

0 commit comments

Comments
 (0)
Please sign in to comment.