Skip to content

Commit 87b10b4

Browse files
nnethercoteLegNeato
authored andcommitted
Remove ComputeCapability and CUDA_ARCH.
CUDA C++ has the `__CUDA_ARCH__` macro for conditional compilation. rust-cuda has a `CUDA_ARCH` environment variable that is similar, and the `from_cuda_arch_env` method parses the environment variable's value to produce a value of type `ComputeCapability`, which can be queried for conditional compilation. But `ComputeCapability` has a big problem. It's missing all the capabilities after 80, including the 'a' and 'f' suffix ones. We could just add them, but it implements `PartialOrd`/`Ord` and uses ordering to determine feature availability. This was valid before the 'a' and 'f' suffixes were added but is no longer, because some pairs of values are incomparable. E.g. `100a` and `101a` -- each one has some features the other doesn't, so neither is clearly larger than the other, and they're also not equal. So, what to do? Well, `CUDA_ARCH` was added in 2022. More recently, another mechanism for conditional compilation was added: `target_feature`, in #239. This does work with the 'a' and 'f' suffix targets, and it's more Rust-y. So this commit just removes `CUDA_ARCH` and `ComputeCapability` (removing two more places where the default compilation target is specified) and changes the only uses (in `cuda_std/src/atomic/mid.rs`) to use `target_feature` instead. We don't have any tests exercising conditional compilation, alas, but I did some manual checking locally to verify that it works the same.
1 parent 98d7195 commit 87b10b4

File tree

6 files changed

+7
-71
lines changed

6 files changed

+7
-71
lines changed

crates/cuda_builder/src/lib.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -809,9 +809,6 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
809809
}
810810
}
811811

812-
let arch = format!("{:?}0", builder.arch);
813-
cargo.env("CUDA_ARCH", arch.strip_prefix("Compute").unwrap());
814-
815812
let cargo_encoded_rustflags = join_checking_for_separators(rustflags, "\x1f");
816813

817814
let build = cargo

crates/cuda_std/src/atomic/mid.rs

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,14 @@
77
#![allow(dead_code, unused_imports)]
88

99
use super::intrinsics;
10-
use crate::cfg::ComputeCapability;
1110
use crate::gpu_only;
1211
use core::sync::atomic::Ordering::{self, *};
1312
use paste::paste;
1413

15-
fn ge_sm70() -> bool {
16-
ComputeCapability::from_cuda_arch_env() >= ComputeCapability::Compute70
17-
}
18-
1914
#[gpu_only]
2015
pub fn device_thread_fence(ordering: Ordering) {
2116
unsafe {
22-
if ge_sm70() {
17+
if cfg!(target_feature = "compute_70") {
2318
if ordering == SeqCst {
2419
return intrinsics::fence_sc_device();
2520
}
@@ -38,7 +33,7 @@ pub fn device_thread_fence(ordering: Ordering) {
3833
#[gpu_only]
3934
pub fn block_thread_fence(ordering: Ordering) {
4035
unsafe {
41-
if ge_sm70() {
36+
if cfg!(target_feature = "compute_70") {
4237
if ordering == SeqCst {
4338
return intrinsics::fence_sc_block();
4439
}
@@ -57,7 +52,7 @@ pub fn block_thread_fence(ordering: Ordering) {
5752
#[gpu_only]
5853
pub fn system_thread_fence(ordering: Ordering) {
5954
unsafe {
60-
if ge_sm70() {
55+
if cfg!(target_feature = "compute_70") {
6156
if ordering == SeqCst {
6257
return intrinsics::fence_sc_system();
6358
}
@@ -80,7 +75,7 @@ macro_rules! load {
8075
#[$crate::gpu_only]
8176
#[allow(clippy::missing_safety_doc)]
8277
pub unsafe fn [<atomic_load_ $width _ $scope>](ptr: *mut $type, ordering: Ordering) -> $type {
83-
if ge_sm70() {
78+
if cfg!(target_feature = "compute_70") {
8479
match ordering {
8580
SeqCst => {
8681
intrinsics::[<fence_sc_ $scope>]();
@@ -136,7 +131,7 @@ macro_rules! store {
136131
#[$crate::gpu_only]
137132
#[allow(clippy::missing_safety_doc)]
138133
pub unsafe fn [<atomic_store_ $width _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) {
139-
if ge_sm70() {
134+
if cfg!(target_feature = "compute_70") {
140135
match ordering {
141136
SeqCst => {
142137
intrinsics::[<fence_sc_ $scope>]();
@@ -185,7 +180,7 @@ macro_rules! inner_fetch_ops_1_param {
185180
#[$crate::gpu_only]
186181
#[allow(clippy::missing_safety_doc)]
187182
pub unsafe fn [<atomic_fetch_ $op _ $type _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) -> $type {
188-
if ge_sm70() {
183+
if cfg!(target_feature = "compute_70") {
189184
match ordering {
190185
SeqCst => {
191186
intrinsics::[<fence_sc_ $scope>]();
@@ -259,7 +254,7 @@ macro_rules! inner_cas {
259254
#[$crate::gpu_only]
260255
#[allow(clippy::missing_safety_doc)]
261256
pub unsafe fn [<atomic_compare_and_swap_ $type _ $scope>](ptr: *mut $type, current: $type, new: $type, ordering: Ordering) -> $type {
262-
if ge_sm70() {
257+
if cfg!(target_feature = "compute_70") {
263258
match ordering {
264259
SeqCst => {
265260
intrinsics::[<fence_sc_ $scope>]();

crates/cuda_std/src/cfg.rs

Lines changed: 0 additions & 48 deletions
This file was deleted.

crates/cuda_std/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ pub mod misc;
4343
// WIP
4444
// pub mod rt;
4545
pub mod atomic;
46-
pub mod cfg;
4746
pub mod ptr;
4847
pub mod shared;
4948
pub mod thread;

crates/rustc_codegen_nvvm/build.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,6 @@ static REQUIRED_MAJOR_LLVM_VERSION: u8 = 7;
1717

1818
fn main() {
1919
rustc_llvm_build();
20-
21-
// this is set by cuda_builder, but in case somebody is using the codegen
22-
// manually, default to 520 (which is what nvvm defaults to).
23-
if option_env!("CUDA_ARCH").is_none() {
24-
println!("cargo:rustc-env=CUDA_ARCH=520")
25-
}
2620
}
2721

2822
fn fail(s: &str) -> ! {

tests/compiletests/src/main.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ fn build_deps(
241241
.arg("--target-dir")
242242
.arg(deps_target_dir)
243243
.env("CARGO_ENCODED_RUSTFLAGS", cargo_encoded_rustflags)
244-
.env("CUDA_ARCH", "70")
245244
.stderr(std::process::Stdio::inherit())
246245
.stdout(std::process::Stdio::inherit())
247246
.status()

0 commit comments

Comments
 (0)