Skip to content

Commit 3f99bcb

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (15 commits)
2 parents 8141058 + a848782 commit 3f99bcb

File tree

62 files changed

+2779
-898
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+2779
-898
lines changed

.github/workflows/sycl-ur-perf-benchmarking.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ on:
2424
- Full
2525
- SYCL
2626
- Minimal
27+
- Core
2728
- Normal
2829
- Test
2930
- Gromacs

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
11411141
MPM.addPass(SYCLPropagateJointMatrixUsagePass());
11421142
// Lowers static/dynamic local memory builtin calls.
11431143
MPM.addPass(SYCLLowerWGLocalMemoryPass());
1144+
// Compile-time properties pass must create standard metadata as early
1145+
// as possible to make them available for other passes.
1146+
MPM.addPass(CompileTimePropertiesPass());
11441147
});
11451148
else if (LangOpts.SYCLIsHost && !LangOpts.SYCLESIMDBuildHostCode)
11461149
PB.registerPipelineStartEPCallback(
@@ -1303,9 +1306,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
13031306
MPM.addPass(SPIRITTAnnotationsPass());
13041307
}
13051308

1306-
// Process properties and annotations
1307-
MPM.addPass(CompileTimePropertiesPass());
1308-
13091309
// Record SYCL aspect names (this should come after propagating aspects
13101310
// and before cleaning up metadata)
13111311
MPM.addPass(RecordSYCLAspectNamesPass());

clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// CHECK: SYCLPropagateAspectsUsagePass
1010
// CHECK: SYCLPropagateJointMatrixUsagePass
1111
// CHECK: SYCLLowerWGLocalMemoryPass
12+
// CHECK: CompileTimePropertiesPass
1213
// CHECK: InferFunctionAttrsPass
1314
// CHECK: AlwaysInlinerPass
1415
// CHECK: ModuleInlinerWrapperPass
@@ -17,7 +18,6 @@
1718
// CHECK: SYCLMutatePrintfAddrspacePass
1819
// CHECK: SYCLPropagateAspectsUsagePass
1920
// CHECK: SYCLAddOptLevelAttributePass
20-
// CHECK: CompileTimePropertiesPass
2121
// CHECK: RecordSYCLAspectNamesPass
2222
// CHECK: CleanupSYCLMetadataPass
2323
//

clang/test/Driver/sycl-offload-new-driver.c renamed to clang/test/Driver/sycl-offload-new-driver.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
/// Verify --offload-new-driver option phases
33
// RUN: %clang --target=x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda,spir64 --offload-new-driver -ccc-print-phases %s 2>&1 \
44
// RUN: | FileCheck -check-prefix=OFFLOAD-NEW-DRIVER %s
5-
// OFFLOAD-NEW-DRIVER: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl)
5+
// OFFLOAD-NEW-DRIVER: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
66
// OFFLOAD-NEW_DRIVER: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
77
// OFFLOAD-NEW_DRIVER: 2: compiler, {1}, ir, (host-sycl)
88
// OFFLOAD-NEW_DRIVER: 3: input, "[[INPUT]]", c++, (device-sycl)
@@ -123,7 +123,7 @@
123123
// RUN: -fsycl-targets=intel_gpu_dg1,intel_gpu_pvc \
124124
// RUN: --offload-new-driver -ccc-print-phases %s 2>&1 \
125125
// RUN: | FileCheck -check-prefix=MULT_TARG_PHASES %s
126-
// MULT_TARG_PHASES: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl)
126+
// MULT_TARG_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
127127
// MULT_TARG_PHASES: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
128128
// MULT_TARG_PHASES: 2: compiler, {1}, ir, (host-sycl)
129129
// MULT_TARG_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, dg1)

devops/scripts/benchmarks/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ The available benchmarks options are:
115115
* `Full` (BenchDNN, Compute, Gromacs, llama, SYCL, Velocity and UMF benchmarks)
116116
* `SYCL` (Compute, llama, SYCL, Velocity)
117117
* `Minimal` (Compute)
118+
* `Core` (Compute: SubmitKernel)
118119
* `Normal` (BenchDNN, Compute, Gromacs, llama, Velocity)
119120
* `Gromacs` (Gromacs)
120121
* `OneDNN` (BenchDNN)

devops/scripts/benchmarks/benches/compute.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,41 @@ def createRrBench(variant_name: str, **kwargs):
353353
return benches
354354

355355

356+
class ComputeBenchCoreSuite(ComputeBench):
357+
"""
358+
A suite for core compute benchmarks scenarios for quick runs.
359+
"""
360+
361+
def name(self) -> str:
362+
return "Compute Benchmarks Core"
363+
364+
def benchmarks(self) -> list[Benchmark]:
365+
core_benches = []
366+
submit_kernel_params = product(
367+
list(RUNTIMES),
368+
[0, 1], # in_order_queue
369+
[0, 1], # measure_completion
370+
[0, 1], # use_events
371+
)
372+
for (
373+
runtime,
374+
in_order_queue,
375+
measure_completion,
376+
use_events,
377+
) in submit_kernel_params:
378+
core_benches.append(
379+
SubmitKernel(
380+
self,
381+
runtime,
382+
in_order_queue,
383+
measure_completion,
384+
use_events,
385+
KernelExecTime=1,
386+
)
387+
)
388+
return core_benches
389+
390+
356391
class ComputeBenchmark(Benchmark):
357392
def __init__(
358393
self,

devops/scripts/benchmarks/compare.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def print_regression(entry: dict, is_warning: bool = False):
411411
log_func(f"-- Delta: {entry['delta']}")
412412
log_func("")
413413
if args.produce_github_summary:
414-
gh_summary.append(f"#### {entry['name']}:")
414+
gh_summary.append(f"##### {entry['name']}:")
415415
gh_summary.append(
416416
f"- Historic {entry['avg_type']}: {entry['hist_avg']}"
417417
)
@@ -427,12 +427,16 @@ def print_regression(entry: dict, is_warning: bool = False):
427427
)
428428
gh_summary.append("")
429429

430+
if args.produce_github_summary:
431+
gh_summary.append("")
432+
gh_summary.append("### Regressions and Improvements")
433+
430434
if improvements:
431435
log.info("#")
432436
log.info("# Improvements:")
433437
log.info("#")
434438
if args.produce_github_summary:
435-
gh_summary.append(f"### Improvements")
439+
gh_summary.append(f"#### Improvements")
436440
gh_summary.append(
437441
f"<details><summary>{len(improvements)} improved tests:</summary>"
438442
)
@@ -444,12 +448,16 @@ def print_regression(entry: dict, is_warning: bool = False):
444448
gh_summary.append("")
445449
if regressions_ignored:
446450
log.info("#")
447-
log.info("# Regressions (filtered out by --regression-filter):")
451+
log.info(
452+
f"# Regressions Ignored (filtered out by --regression-filter: {filter_type_capitalized})"
453+
)
448454
log.info("#")
449455
if args.produce_github_summary:
450-
gh_summary.append(f"### Non-{filter_type_capitalized} Regressions")
451456
gh_summary.append(
452-
f"<details><summary>{len(regressions_ignored)} non-{args.regression_filter_type} regressions:</summary>"
457+
f"#### Regressions Ignored (filtered out by --regression-filter: {filter_type_capitalized})"
458+
)
459+
gh_summary.append(
460+
f"<details><summary>{len(regressions_ignored)} non-'{args.regression_filter_type}' regressions:</summary>"
453461
)
454462
gh_summary.append("")
455463
for test in regressions_ignored:
@@ -462,7 +470,7 @@ def print_regression(entry: dict, is_warning: bool = False):
462470
log.warning("# Regressions:")
463471
log.warning("#")
464472
if args.produce_github_summary:
465-
gh_summary.append(f"### {filter_type_capitalized} Regressions")
473+
gh_summary.append(f"#### {filter_type_capitalized} Regressions")
466474
gh_summary.append(
467475
f"{len(regressions_of_concern)} {args.regression_filter_type} regressions. These regressions warrant a CI failure:"
468476
)
@@ -480,8 +488,6 @@ def print_regression(entry: dict, is_warning: bool = False):
480488

481489
log.info("No unexpected regressions found!")
482490
if args.produce_github_summary:
483-
gh_summary.append("")
484-
gh_summary.append("### Regressions")
485491
gh_summary.append("No unexpected regressions found!")
486492
with open(options.github_summary_regression_filename, "w") as f:
487493
f.write("\n".join(gh_summary))

devops/scripts/benchmarks/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def main(directory, additional_env_vars, compare_names, filter, execution_stats)
269269

270270
suites = [
271271
ComputeBench(),
272+
ComputeBenchCoreSuite(),
272273
VelocityBench(),
273274
SyclBench(),
274275
LlamaCppBench(),

devops/scripts/benchmarks/presets.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
"Minimal": [
2727
"Compute Benchmarks",
2828
],
29+
"Core": [
30+
"Compute Benchmarks Core",
31+
],
2932
"Normal": [
3033
"BenchDNN",
3134
"Compute Benchmarks",

libdevice/nativecpu_utils.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,14 @@ DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL);
9595
DefSubgroupBlockINTEL_vt(Type, v8)
9696

9797
namespace ncpu_types {
98+
template <typename DataT, int NumElements>
99+
using native_vector_t =
100+
sycl::detail::ConvertToOpenCLType_t<sycl::vec<DataT, NumElements>>;
101+
98102
template <class T> struct vtypes {
99-
using v2 = typename sycl::vec<T, 2>::vector_t;
100-
using v4 = typename sycl::vec<T, 4>::vector_t;
101-
using v8 = typename sycl::vec<T, 8>::vector_t;
103+
using v2 = native_vector_t<T, 2>;
104+
using v4 = native_vector_t<T, 4>;
105+
using v8 = native_vector_t<T, 8>;
102106
};
103107
} // namespace ncpu_types
104108

@@ -224,15 +228,15 @@ DefineLogicalGroupOp(bool, bool, i1);
224228
} \
225229
\
226230
DEVICE_EXTERNAL Type __spirv_GroupBroadcast( \
227-
int32_t g, Type v, sycl::vec<IDType, 2>::vector_t l) noexcept { \
231+
int32_t g, Type v, ncpu_types::native_vector_t<IDType, 2> l) noexcept { \
228232
if (__spv::Scope::Flag::Subgroup == g) \
229233
return __mux_sub_group_broadcast_##Sfx(v, l[0]); \
230234
else \
231235
return __mux_work_group_broadcast_##Sfx(0, v, l[0], l[1], 0); \
232236
} \
233237
\
234238
DEVICE_EXTERNAL Type __spirv_GroupBroadcast( \
235-
int32_t g, Type v, sycl::vec<IDType, 3>::vector_t l) noexcept { \
239+
int32_t g, Type v, ncpu_types::native_vector_t<IDType, 3> l) noexcept { \
236240
if (__spv::Scope::Flag::Subgroup == g) \
237241
return __mux_sub_group_broadcast_##Sfx(v, l[0]); \
238242
else \
@@ -310,8 +314,8 @@ DefShuffleINTEL_All(float, f32, float);
310314
DefShuffleINTEL_All(_Float16, f16, _Float16);
311315

312316
#define DefineShuffleVec(T, N, Sfx, MuxType) \
313-
using vt##T##N = sycl::vec<T, N>::vector_t; \
314-
using vt##MuxType##N = sycl::vec<MuxType, N>::vector_t; \
317+
using vt##T##N = ncpu_types::native_vector_t<T, N>; \
318+
using vt##MuxType##N = ncpu_types::native_vector_t<MuxType, N>; \
315319
DefShuffleINTEL_All(vt##T##N, v##N##Sfx, vt##MuxType##N)
316320

317321
#define DefineShuffleVec2to16(Type, Sfx, MuxType) \

0 commit comments

Comments
 (0)