|
19 | 19 | } |
20 | 20 | }, |
21 | 21 | { |
22 | | - "test_name": "serving_qwen3_30b_a3b_tp8_random_in1k_out2k", |
| 22 | + "test_name": "serving_gemma_3_27b_it_tp8_random_in1k_out2k", |
23 | 23 | "qps_list": [10], |
24 | 24 | "server_parameters": { |
25 | | - "model": "Qwen/Qwen3-30B-A3B", |
| 25 | + "model": "google/gemma-3-27b-it", |
26 | 26 | "tensor_parallel_size": 8, |
27 | 27 | "swap_space": 16, |
28 | 28 | "disable_log_stats": "", |
29 | 29 | "disable_log_requests": "", |
30 | 30 | "load_format": "dummy", |
31 | | - "max_model_len": 8192 |
| 31 | + "context_length": 8192 |
32 | 32 | }, |
33 | 33 | "client_parameters": { |
34 | | - "model": "Qwen/Qwen3-30B-A3B", |
| 34 | + "model": "google/gemma-3-27b-it", |
35 | 35 | "backend": "vllm", |
36 | 36 | "dataset_name": "random", |
37 | 37 | "num_prompts": 200, |
|
40 | 40 | } |
41 | 41 | }, |
42 | 42 | { |
43 | | - "test_name": "serving_gemma_3_27b_it_tp8_random_in1k_out2k", |
| 43 | + "test_name": "serving_gemma_3_4b_it_tp1_random_in1k_out2k", |
44 | 44 | "qps_list": [10], |
45 | 45 | "server_parameters": { |
46 | | - "model": "google/gemma-3-27b-it", |
47 | | - "tensor_parallel_size": 8, |
| 46 | + "model": "google/gemma-3-4b-it", |
| 47 | + "tensor_parallel_size": 1, |
48 | 48 | "swap_space": 16, |
49 | 49 | "disable_log_stats": "", |
50 | 50 | "disable_log_requests": "", |
51 | 51 | "load_format": "dummy", |
52 | | - "max_model_len": 8192 |
| 52 | + "context_length": 8192 |
53 | 53 | }, |
54 | 54 | "client_parameters": { |
55 | | - "model": "google/gemma-3-27b-it", |
| 55 | + "model": "google/gemma-3-4b-it", |
56 | 56 | "backend": "vllm", |
57 | 57 | "dataset_name": "random", |
58 | 58 | "num_prompts": 200, |
|
61 | 61 | } |
62 | 62 | }, |
63 | 63 | { |
64 | | - "test_name": "serving_gemma_3_4b_it_tp1_random_in1k_out2k", |
65 | | - "qps_list": [10], |
| 64 | + "test_name": "serving_opt125m_tp1_sharegpt", |
| 65 | + "qps_list": [1, 4, 16, "inf"], |
66 | 66 | "server_parameters": { |
67 | | - "model": "google/gemma-3-4b-it", |
| 67 | + "model": "facebook/opt-125m", |
68 | 68 | "tensor_parallel_size": 1, |
69 | 69 | "swap_space": 16, |
70 | 70 | "disable_log_stats": "", |
71 | 71 | "disable_log_requests": "", |
72 | 72 | "load_format": "dummy", |
73 | | - "max_model_len": 8192 |
| 73 | + "context_length": 2048 |
74 | 74 | }, |
75 | 75 | "client_parameters": { |
76 | | - "model": "google/gemma-3-4b-it", |
| 76 | + "model": "facebook/opt-125m", |
77 | 77 | "backend": "vllm", |
78 | | - "dataset_name": "random", |
79 | | - "num_prompts": 200, |
80 | | - "random_input_len": 1024, |
81 | | - "random_output_len": 2048 |
| 78 | + "dataset_name": "sharegpt", |
| 79 | + "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", |
| 80 | + "num_prompts": 200 |
82 | 81 | } |
83 | 82 | }, |
84 | 83 | { |
85 | | - "test_name": "serving_qwen3_8b_tp1_random_in1k_out2k", |
86 | | - "qps_list": [10], |
| 84 | + "test_name": "serving_opt125m_tp1_random_in750_out75", |
| 85 | + "qps_list": [1, 4, 16, "inf"], |
87 | 86 | "server_parameters": { |
88 | | - "model": "Qwen/Qwen3-8B", |
| 87 | + "model": "facebook/opt-125m", |
89 | 88 | "tensor_parallel_size": 1, |
90 | 89 | "swap_space": 16, |
91 | 90 | "disable_log_stats": "", |
92 | 91 | "disable_log_requests": "", |
93 | 92 | "load_format": "dummy", |
94 | | - "max_model_len": 8192 |
| 93 | + "context_length": 2048 |
95 | 94 | }, |
96 | 95 | "client_parameters": { |
97 | | - "model": "Qwen/Qwen3-8B", |
| 96 | + "model": "facebook/opt-125m", |
98 | 97 | "backend": "vllm", |
99 | 98 | "dataset_name": "random", |
100 | 99 | "num_prompts": 200, |
101 | | - "random_input_len": 1024, |
102 | | - "random_output_len": 2048 |
| 100 | + "random_input_len": 750, |
| 101 | + "random_output_len": 75 |
103 | 102 | } |
104 | 103 | } |
105 | 104 | ] |
0 commit comments