diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs index f187ede2..6c94293e 100644 --- a/crates/cuda_builder/src/lib.rs +++ b/crates/cuda_builder/src/lib.rs @@ -715,6 +715,12 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result { rustflags.push(format!("--emit={string}")); } + if builder.debug == DebugInfo::None { + // Default dev builds: strip debuginfo to avoid libnvvm crashes with unoptimized IR. + // TODO: drop this once newer libnvvm toolchains are stable with debuginfo in opt=0 builds. + rustflags.push("-Cdebuginfo=0".into()); + } + let mut llvm_args = vec![NvvmOption::Arch(builder.arch).to_string()]; if !builder.nvvm_opts { diff --git a/crates/cuda_std/src/warp.rs b/crates/cuda_std/src/warp.rs index 146e2abb..0a1d242f 100644 --- a/crates/cuda_std/src/warp.rs +++ b/crates/cuda_std/src/warp.rs @@ -296,9 +296,9 @@ unsafe fn match_any_32(mask: u32, value: u32) -> u32 { unsafe fn match_any_64(mask: u32, value: u64) -> u32 { extern "C" { #[link_name = "llvm.nvvm.match.any.sync.i64"] - fn __nvvm_warp_match_any_64(mask: u32, value: u64) -> u32; + fn __nvvm_warp_match_any_64(mask: u32, value: u64) -> u64; } - __nvvm_warp_match_any_64(mask, value) + __nvvm_warp_match_any_64(mask, value) as u32 } #[gpu_only] diff --git a/crates/rustc_codegen_nvvm/src/override_fns.rs b/crates/rustc_codegen_nvvm/src/override_fns.rs index e4c0ee23..060e6e29 100644 --- a/crates/rustc_codegen_nvvm/src/override_fns.rs +++ b/crates/rustc_codegen_nvvm/src/override_fns.rs @@ -8,6 +8,7 @@ use crate::context::CodegenCx; use crate::llvm; use rustc_codegen_ssa::mono_item::MonoItemExt; use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods}; +use rustc_hir::def::DefKind; use rustc_hir::def_id::LOCAL_CRATE; use rustc_middle::mir::mono::{Linkage, MonoItem, MonoItemData, Visibility}; use rustc_middle::ty::layout::FnAbiOf; @@ -43,6 +44,12 @@ fn should_override<'tcx>(func: Instance<'tcx>, cx: &CodegenCx<'_, 'tcx>) -> bool return false; } + // Only try to override top-level/assoc functions; closures/anon fns cause ICE via item_name. + match cx.tcx.def_kind(func.def_id()) { + DefKind::Fn | DefKind::AssocFn => {} + _ => return false, + } + let sym = cx.tcx.item_name(func.def_id()); let name = sym.as_str(); diff --git a/crates/rustc_codegen_nvvm/src/ty.rs b/crates/rustc_codegen_nvvm/src/ty.rs index b88a2dea..22801013 100644 --- a/crates/rustc_codegen_nvvm/src/ty.rs +++ b/crates/rustc_codegen_nvvm/src/ty.rs @@ -228,10 +228,16 @@ impl<'ll, 'tcx> BaseTypeCodegenMethods for CodegenCx<'ll, 'tcx> { fn float_width(&self, ty: &'ll Type) -> usize { match self.type_kind(ty) { + TypeKind::Half => 16, TypeKind::Float => 32, TypeKind::Double => 64, TypeKind::X86_FP80 => 80, TypeKind::FP128 | TypeKind::PPC_FP128 => 128, + TypeKind::BFloat => 16, + TypeKind::Vector | TypeKind::ScalableVector => { + // Recurse on element type for vector floats + self.float_width(self.element_type(ty)) + } _ => bug!("llvm_float_width called on a non-float type"), } }