Skip to content

Commit 644dd73

Browse files
authored
Add gpt-oss to vLLM dashboard (#74)
* Add gpt-oss to vLLM dashboard Signed-off-by: Huy Do <huydhn@gmail.com> * Typo Signed-off-by: Huy Do <huydhn@gmail.com> * Default to h100 Signed-off-by: Huy Do <huydhn@gmail.com> * Typo Signed-off-by: Huy Do <huydhn@gmail.com> * Set input and output lengths Signed-off-by: Huy Do <huydhn@gmail.com> * Another tweak Signed-off-by: Huy Do <huydhn@gmail.com> * Another tweak Signed-off-by: Huy Do <huydhn@gmail.com> --------- Signed-off-by: Huy Do <huydhn@gmail.com>
1 parent 4a7f7a2 commit 644dd73

File tree

4 files changed

+87
-1
lines changed

4 files changed

+87
-1
lines changed

.github/workflows/vllm-benchmark.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
shell: bash
5454
env:
5555
MODELS: ${{ inputs.models || '' }}
56-
RUNNERS: ${{ inputs.runners || '' }}
56+
RUNNERS: ${{ inputs.runners || 'h100' }}
5757
run: |
5858
set -eux
5959

vllm-benchmarks/benchmarks/cuda/latency-tests.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,27 @@
5050
"num_iters": 15,
5151
"max_model_len": 8192
5252
}
53+
},
54+
{
55+
"test_name": "latency_gpt_oss_20b_tp1",
56+
"parameters": {
57+
"model": "openai/gpt-oss-20b",
58+
"tensor_parallel_size": 1,
59+
"load_format": "dummy",
60+
"num_iters_warmup": 5,
61+
"num_iters": 15,
62+
"max_model_len": 8192
63+
}
64+
},
65+
{
66+
"test_name": "latency_gpt_oss_120b_tp4",
67+
"parameters": {
68+
"model": "openai/gpt-oss-120b",
69+
"tensor_parallel_size": 4,
70+
"load_format": "dummy",
71+
"num_iters_warmup": 5,
72+
"num_iters": 15,
73+
"max_model_len": 8192
74+
}
5375
}
5476
]

vllm-benchmarks/benchmarks/cuda/serving-tests.json

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,5 +411,45 @@
411411
"random_input_len": 30720,
412412
"random_output_len": 100
413413
}
414+
},
415+
{
416+
"test_name": "serving_gpt_oss_20b_tp1_random_in5k_out8k",
417+
"qps_list": [1, 4, 16, "inf"],
418+
"server_parameters": {
419+
"model": "openai/gpt-oss-20b",
420+
"tensor_parallel_size": 1,
421+
"swap_space": 16,
422+
"disable_log_stats": "",
423+
"disable_log_requests": "",
424+
"load_format": "dummy"
425+
},
426+
"client_parameters": {
427+
"model": "openai/gpt-oss-20b",
428+
"backend": "vllm",
429+
"dataset_name": "random",
430+
"num_prompts": 200,
431+
"random_input_len": 5250,
432+
"random_output_len": 8250
433+
}
434+
},
435+
{
436+
"test_name": "serving_gpt_oss_120b_tp4_random_in5k_out8k",
437+
"qps_list": [1, 4, 16, "inf"],
438+
"server_parameters": {
439+
"model": "openai/gpt-oss-120b",
440+
"tensor_parallel_size": 4,
441+
"swap_space": 16,
442+
"disable_log_stats": "",
443+
"disable_log_requests": "",
444+
"load_format": "dummy"
445+
},
446+
"client_parameters": {
447+
"model": "openai/gpt-oss-120b",
448+
"backend": "vllm",
449+
"dataset_name": "random",
450+
"num_prompts": 200,
451+
"random_input_len": 5250,
452+
"random_output_len": 8250
453+
}
414454
}
415455
]

vllm-benchmarks/benchmarks/cuda/throughput-tests.json

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,5 +55,29 @@
5555
"backend": "vllm",
5656
"max_model_len": 8192
5757
}
58+
},
59+
{
60+
"test_name": "throughput_gpt_oss_20b_tp1",
61+
"parameters": {
62+
"model": "openai/gpt-oss-20b",
63+
"tensor_parallel_size": 1,
64+
"load_format": "dummy",
65+
"dataset": "./ShareGPT_V3_unfiltered_cleaned_split.json",
66+
"num_prompts": 200,
67+
"backend": "vllm",
68+
"max_model_len": 8192
69+
}
70+
},
71+
{
72+
"test_name": "throughput_gpt_oss_120b_tp4",
73+
"parameters": {
74+
"model": "openai/gpt-oss-120b",
75+
"tensor_parallel_size": 4,
76+
"load_format": "dummy",
77+
"dataset": "./ShareGPT_V3_unfiltered_cleaned_split.json",
78+
"num_prompts": 200,
79+
"backend": "vllm",
80+
"max_model_len": 8192
81+
}
5882
}
5983
]

0 commit comments

Comments
 (0)