From e29df9b48b65d2beeca41de54cdd9fc5dbef7f7e Mon Sep 17 00:00:00 2001 From: jainapurva Date: Thu, 4 Dec 2025 19:23:24 +0000 Subject: [PATCH] Add relevant shapes to microbenchmarks --- .../microbenchmark_quantization_config.yml | 8 +-- .../microbenchmarks/benchmark_runner.py | 49 ++++++++++++++++++- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/benchmarks/dashboard/microbenchmark_quantization_config.yml b/benchmarks/dashboard/microbenchmark_quantization_config.yml index 4483112fa1..d3e57647d4 100644 --- a/benchmarks/dashboard/microbenchmark_quantization_config.yml +++ b/benchmarks/dashboard/microbenchmark_quantization_config.yml @@ -10,9 +10,11 @@ output_dir: "benchmarks/microbenchmarks/results" model_params: - name: "small_bf16_linear" matrix_shapes: - - name: "small_sweep" - min_power: 10 - max_power: 15 + - name: "llama4" + - name: "deepseek_v3_236b" + - name: "deepseek_v3_671b" + - name: "qwen3_32b" + - name: "gemma3_27b" high_precision_dtype: "torch.bfloat16" torch_compile_mode: "max-autotune" device: "cuda" diff --git a/benchmarks/microbenchmarks/benchmark_runner.py b/benchmarks/microbenchmarks/benchmark_runner.py index 45a0534ee0..51f5d8182f 100644 --- a/benchmarks/microbenchmarks/benchmark_runner.py +++ b/benchmarks/microbenchmarks/benchmark_runner.py @@ -60,6 +60,53 @@ def get_shapes_for_config( "ffn.w2": (M, 3584, 8192), } shapes.extend([(f"{name}_{k}", v) for k, v in llama_shapes.items()]) + elif name == "llama4": + # LLaMa 4 shapes + llama4_shapes = [ + ("FFN", (16384, 8192, 5120)), + ("QO_proj", (16384, 8192, 8192)), + ("KV_proj", (16384, 8192, 1024)), + ("FFN", (128000, 8192, 5120)), + ("QO_proj", (128000, 8192, 8192)), + ("KV_proj", (128000, 8192, 1024)), + ] + shapes.extend([(f"{name}_{k}", v) for k, v in llama4_shapes]) + elif name == "deepseek_v3_236b": + # DeepSeek V3 236B shapes + deepseek_v3_236b_shapes = [ + ("FFN", (16384, 1536, 5120)), + ("QKVO_proj", (16384, 7168, 7168)), + ("FFN", (128000, 1536, 5120)), + ("QKVO_proj", (128000, 7168, 7168)), + ] + shapes.extend([(f"{name}_{k}", v) for k, v in deepseek_v3_236b_shapes]) + elif name == "deepseek_v3_671b": + # DeepSeek V3 671B shapes + deepseek_v3_671b_shapes = [ + ("FFN", (16384, 2048, 7168)), + ("QKVO_proj", (16384, 7168, 7168)), + ("FFN", (128000, 2048, 7168)), + ("QKVO_proj", (128000, 7168, 7168)), + ] + shapes.extend([(f"{name}_{k}", v) for k, v in deepseek_v3_671b_shapes]) + elif name == "qwen3_32b": + # Qwen3 32B shapes + qwen3_32b_shapes = [ + ("QO_proj", (16384, 5120, 5120)), + ("KV_proj", (16384, 5120, 640)), + ("QO_proj", (128000, 5120, 5120)), + ("KV_proj", (128000, 5120, 640)), + ] + shapes.extend([(f"{name}_{k}", v) for k, v in qwen3_32b_shapes]) + elif name == "gemma3_27b": + # Gemma3 27B shapes + gemma3_27b_shapes = [ + ("QO_proj", (16384, 4096, 4096)), + ("KV_proj", (16384, 4096, 1024)), + ("QO_proj", (128000, 4096, 4096)), + ("KV_proj", (128000, 4096, 1024)), + ] + shapes.extend([(f"{name}_{k}", v) for k, v in gemma3_27b_shapes]) elif name == "pow2": # Generate shapes with dimensions that are powers of 2 min_power_of_2 = shape_config.get("min_power", 10) # 1024 @@ -105,7 +152,7 @@ def get_shapes_for_config( counter += 1 else: raise NotImplementedError( - f"Shape config {name} not supported. Supported options: custom, llama, pow2, pow2_extended, sweep." + f"Shape config {name} not supported. Supported options: custom, llama, llama4, deepseek_v3_236b, deepseek_v3_671b, qwen3_32b, gemma3_27b, pow2, pow2_extended, sweep." ) return shapes