Skip to content

Commit 0df8306

Browse files
authored
[Clang][CUDA] Add support for SM_88, SM_110, and SM_110a architectures (#170258)
This patch adds support for new GPU architectures introduced in CUDA 13.0 in Clang: - SM_88: Ampere architecture variant - SM_110/SM_110a: Blackwell architecture variants Additionally, this patch deprecates SM_101/SM_101a support for CUDA 13.0 and later versions. The SM_101 architecture is superseded by SM_110 and is no longer supported by CUDA 13.0+ toolchain components.
1 parent 93c7ad1 commit 0df8306

File tree

8 files changed

+34
-3
lines changed

8 files changed

+34
-3
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.td

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,22 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
2323

2424
let Features = "sm_121a" in def SM_121a : SMFeatures;
2525
let Features = "sm_120a" in def SM_120a : SMFeatures;
26+
let Features = "sm_110a" in def SM_110a : SMFeatures;
2627
let Features = "sm_103a" in def SM_103a : SMFeatures;
2728
let Features = "sm_101a" in def SM_101a : SMFeatures;
2829
let Features = "sm_100a" in def SM_100a : SMFeatures;
2930
let Features = "sm_90a" in def SM_90a : SMFeatures;
3031

3132
def SM_121 : SM<"121", [SM_121a]>;
3233
def SM_120 : SM<"120", [SM_120a, SM_121]>;
33-
def SM_103 : SM<"103", [SM_103a, SM_120]>;
34+
def SM_110 : SM<"110", [SM_110a, SM_120]>;
35+
def SM_103 : SM<"103", [SM_103a, SM_110]>;
3436
def SM_101 : SM<"101", [SM_101a, SM_103]>;
3537
def SM_100 : SM<"100", [SM_100a, SM_101]>;
3638
def SM_90 : SM<"90", [SM_90a, SM_100]>;
3739
def SM_89 : SM<"89", [SM_90]>;
38-
def SM_87 : SM<"87", [SM_89]>;
40+
def SM_88 : SM<"88", [SM_89]>;
41+
def SM_87 : SM<"87", [SM_88]>;
3942
def SM_86 : SM<"86", [SM_87]>;
4043
def SM_80 : SM<"80", [SM_86]>;
4144
def SM_75 : SM<"75", [SM_80]>;
@@ -54,8 +57,9 @@ class PTX<string version, PTXFeatures newer> : PTXFeatures {
5457
let Features = !strconcat("ptx", version, "|", newer.Features);
5558
}
5659

57-
let Features = "ptx88" in def PTX88 : PTXFeatures;
60+
let Features = "ptx90" in def PTX90 : PTXFeatures;
5861

62+
def PTX88 : PTX<"88", PTX90>;
5963
def PTX87 : PTX<"87", PTX88>;
6064
def PTX86 : PTX<"86", PTX87>;
6165
def PTX85 : PTX<"85", PTX86>;

clang/include/clang/Basic/Cuda.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ enum class CudaVersion {
4848
CUDA_126,
4949
CUDA_128,
5050
CUDA_129,
51+
CUDA_130,
5152
FULLY_SUPPORTED = CUDA_128,
5253
PARTIALLY_SUPPORTED =
5354
CUDA_129, // Partially supported. Proceed with a warning.

clang/include/clang/Basic/OffloadArch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ enum class OffloadArch {
3838
SM_80,
3939
SM_86,
4040
SM_87,
41+
SM_88,
4142
SM_89,
4243
SM_90,
4344
SM_90a,
@@ -47,6 +48,8 @@ enum class OffloadArch {
4748
SM_101a,
4849
SM_103,
4950
SM_103a,
51+
SM_110,
52+
SM_110a,
5053
SM_120,
5154
SM_120a,
5255
SM_121,

clang/lib/Basic/Cuda.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
4545
CUDA_ENTRY(12, 6),
4646
CUDA_ENTRY(12, 8),
4747
CUDA_ENTRY(12, 9),
48+
CUDA_ENTRY(13, 0),
4849
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
4950
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
5051
};
@@ -125,6 +126,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
125126
case OffloadArch::SM_121:
126127
case OffloadArch::SM_121a:
127128
return CudaVersion::CUDA_129;
129+
case OffloadArch::SM_88:
130+
case OffloadArch::SM_110:
131+
case OffloadArch::SM_110a:
132+
return CudaVersion::CUDA_130;
128133
default:
129134
llvm_unreachable("invalid enum");
130135
}
@@ -147,6 +152,9 @@ CudaVersion MaxVersionForOffloadArch(OffloadArch A) {
147152
case OffloadArch::SM_35:
148153
case OffloadArch::SM_37:
149154
return CudaVersion::CUDA_118;
155+
case OffloadArch::SM_101:
156+
case OffloadArch::SM_101a:
157+
return CudaVersion::CUDA_129;
150158
default:
151159
return CudaVersion::NEW;
152160
}

clang/lib/Basic/OffloadArch.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ static const OffloadArchToStringMap ArchNames[] = {
2626
SM(75), // Turing
2727
SM(80), SM(86), // Ampere
2828
SM(87), // Jetson/Drive AGX Orin
29+
SM(88), // Ampere
2930
SM(89), // Ada Lovelace
3031
SM(90), // Hopper
3132
SM(90a), // Hopper
@@ -35,6 +36,8 @@ static const OffloadArchToStringMap ArchNames[] = {
3536
SM(101a), // Blackwell
3637
SM(103), // Blackwell
3738
SM(103a), // Blackwell
39+
SM(110), // Blackwell
40+
SM(110a), // Blackwell
3841
SM(120), // Blackwell
3942
SM(120a), // Blackwell
4043
SM(121), // Blackwell

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
287287
return "860";
288288
case OffloadArch::SM_87:
289289
return "870";
290+
case OffloadArch::SM_88:
291+
return "880";
290292
case OffloadArch::SM_89:
291293
return "890";
292294
case OffloadArch::SM_90:
@@ -301,6 +303,9 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
301303
case OffloadArch::SM_103:
302304
case OffloadArch::SM_103a:
303305
return "1030";
306+
case OffloadArch::SM_110:
307+
case OffloadArch::SM_110a:
308+
return "1100";
304309
case OffloadArch::SM_120:
305310
case OffloadArch::SM_120a:
306311
return "1200";
@@ -316,6 +321,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
316321
case OffloadArch::SM_100a:
317322
case OffloadArch::SM_101a:
318323
case OffloadArch::SM_103a:
324+
case OffloadArch::SM_110a:
319325
case OffloadArch::SM_120a:
320326
case OffloadArch::SM_121a:
321327
Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2309,6 +2309,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
23092309
case OffloadArch::SM_80:
23102310
case OffloadArch::SM_86:
23112311
case OffloadArch::SM_87:
2312+
case OffloadArch::SM_88:
23122313
case OffloadArch::SM_89:
23132314
case OffloadArch::SM_90:
23142315
case OffloadArch::SM_90a:
@@ -2318,6 +2319,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
23182319
case OffloadArch::SM_101a:
23192320
case OffloadArch::SM_103:
23202321
case OffloadArch::SM_103a:
2322+
case OffloadArch::SM_110:
2323+
case OffloadArch::SM_110a:
23212324
case OffloadArch::SM_120:
23222325
case OffloadArch::SM_120a:
23232326
case OffloadArch::SM_121:

clang/test/Misc/target-invalid-cpu-note/nvptx.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
// CHECK-SAME: {{^}}, sm_80
2424
// CHECK-SAME: {{^}}, sm_86
2525
// CHECK-SAME: {{^}}, sm_87
26+
// CHECK-SAME: {{^}}, sm_88
2627
// CHECK-SAME: {{^}}, sm_89
2728
// CHECK-SAME: {{^}}, sm_90
2829
// CHECK-SAME: {{^}}, sm_90a
@@ -32,6 +33,8 @@
3233
// CHECK-SAME: {{^}}, sm_101a
3334
// CHECK-SAME: {{^}}, sm_103
3435
// CHECK-SAME: {{^}}, sm_103a
36+
// CHECK-SAME: {{^}}, sm_110
37+
// CHECK-SAME: {{^}}, sm_110a
3538
// CHECK-SAME: {{^}}, sm_120
3639
// CHECK-SAME: {{^}}, sm_120a
3740
// CHECK-SAME: {{^}}, sm_121

0 commit comments

Comments
 (0)