diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs index 12f8c0f0..1a056d1b 100644 --- a/crates/cuda_builder/src/lib.rs +++ b/crates/cuda_builder/src/lib.rs @@ -130,6 +130,21 @@ pub struct CudaBuilder { /// /// `true` by default. pub override_libm: bool, + /// If `true`, the codegen will attempt to place `static` variables in CUDA's + /// constant memory, which is fast but limited in size (~64KB total across all + /// statics). The codegen avoids placing any single item too large, but it does not + /// track cumulative size. Exceeding the limit may cause `IllegalAddress` runtime + /// errors (CUDA error code: `700`). + /// + /// The default is `false`, which places all statics in global memory. This avoids + /// such errors but may reduce performance and use more general memory. When set to + /// `false`, you can still annotate `static` variables with + /// `#[cuda_std::address_space(constant)]` to place them in constant memory + /// manually. This option only affects automatic placement. + /// + /// Future versions may support smarter placement and user-controlled + /// packing/spilling strategies. + pub use_constant_memory_space: bool, /// Whether to generate any debug info and what level of info to generate. pub debug: DebugInfo, /// Additional arguments passed to cargo during `cargo build`. @@ -155,6 +170,7 @@ impl CudaBuilder { emit: None, optix: false, override_libm: true, + use_constant_memory_space: false, debug: DebugInfo::None, build_args: vec![], final_module_path: None, @@ -284,6 +300,24 @@ impl CudaBuilder { self } + /// If `true`, the codegen will attempt to place `static` variables in CUDA's + /// constant memory, which is fast but limited in size (~64KB total across all + /// statics). The codegen avoids placing any single item too large, but it does not + /// track cumulative size. Exceeding the limit may cause `IllegalAddress` runtime + /// errors (CUDA error code: `700`). + /// + /// If `false`, all statics are placed in global memory. This avoids such errors but + /// may reduce performance and use more general memory. You can still annotate + /// `static` variables with `#[cuda_std::address_space(constant)]` to place them in + /// constant memory manually as this option only affects automatic placement. + /// + /// Future versions may support smarter placement and user-controlled + /// packing/spilling strategies. + pub fn use_constant_memory_space(mut self, use_constant_memory_space: bool) -> Self { + self.use_constant_memory_space = use_constant_memory_space; + self + } + /// An optional path where to dump LLVM IR of the final output the codegen will feed to libnvvm. Usually /// used for debugging. pub fn final_module_path(mut self, path: impl AsRef) -> Self { diff --git a/crates/rustc_codegen_nvvm/src/builder.rs b/crates/rustc_codegen_nvvm/src/builder.rs index 7dac242b..ac2075f2 100644 --- a/crates/rustc_codegen_nvvm/src/builder.rs +++ b/crates/rustc_codegen_nvvm/src/builder.rs @@ -1154,18 +1154,7 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { impl<'ll> StaticBuilderMethods for Builder<'_, 'll, '_> { fn get_static(&mut self, def_id: DefId) -> &'ll Value { - unsafe { - let mut g = self.cx.get_static(def_id); - let llty = self.val_ty(g); - let addrspace = AddressSpace(llvm::LLVMGetPointerAddressSpace(llty)); - if addrspace != AddressSpace::DATA { - trace!("Remapping global address space of global {:?}", g); - let llty = llvm::LLVMGetElementType(llty); - let ty = self.type_ptr_to_ext(llty, AddressSpace::DATA); - g = llvm::LLVMBuildAddrSpaceCast(self.llbuilder, g, ty, unnamed()); - } - g - } + self.cx.get_static(def_id) } } diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 077d8887..a185a101 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -22,7 +22,7 @@ use rustc_errors::DiagMessage; use rustc_hash::FxHashMap; use rustc_middle::dep_graph::DepContext; use rustc_middle::ty::layout::{ - FnAbiError, FnAbiOf, FnAbiRequest, HasTyCtxt, HasTypingEnv, LayoutError, + FnAbiError, FnAbiOf, FnAbiRequest, HasTyCtxt, HasTypingEnv, LayoutError, LayoutOf, }; use rustc_middle::ty::layout::{FnAbiOfHelpers, LayoutOfHelpers}; use rustc_middle::ty::{Ty, TypeVisitableExt}; @@ -40,6 +40,10 @@ use rustc_target::callconv::FnAbi; use rustc_target::spec::{HasTargetSpec, Target}; use tracing::{debug, trace}; +/// "There is a total of 64 KB constant memory on a device." +/// +const CONSTANT_MEMORY_SIZE_LIMIT_BYTES: u64 = 64 * 1024; + pub(crate) struct CodegenCx<'ll, 'tcx> { pub tcx: TyCtxt<'tcx>, @@ -267,7 +271,31 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { } if !is_mutable && self.type_is_freeze(ty) { - AddressSpace(4) + if !self.codegen_args.use_constant_memory_space { + // We aren't using constant memory, so put the instance in global memory. + AddressSpace(1) + } else { + // We are using constant memory, see if the instance will fit. + // + // FIXME(@LegNeato) ideally we keep track of what we have put into + // constant memory and when it is filled up spill instead of only + // spilling when a static is big. We'll probably want some packing + // strategy controlled by the user...for example, if you have one large + // static and many small ones, you might want the small ones to all be + // in constant memory or just the big one depending on your workload. + let layout = self.layout_of(ty); + if layout.size.bytes() > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { + self.tcx.sess.dcx().warn(format!( + "static `{}` exceeds the constant memory limit; placing in global memory (performance may be reduced)", + instance + )); + // Place instance in global memory if it is too big for constant memory. + AddressSpace(1) + } else { + // Place instance in constant memory if it fits. + AddressSpace(4) + } + } } else { AddressSpace::DATA } @@ -519,6 +547,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { pub struct CodegenArgs { pub nvvm_options: Vec, pub override_libm: bool, + pub use_constant_memory_space: bool, pub final_module_path: Option, } @@ -537,6 +566,8 @@ impl CodegenArgs { cg_args.nvvm_options.push(flag); } else if arg == "--override-libm" { cg_args.override_libm = true; + } else if arg == "--use-constant-memory-space" { + cg_args.use_constant_memory_space = true; } else if arg == "--final-module-path" { cg_args.final_module_path = Some(PathBuf::from( args.get(idx + 1).expect("No path for --final-module-path"),