@@ -217,25 +217,26 @@ def _step():
217217 delta_fwd = _step ()
218218 total_step += delta_fwd
219219 num_samples += self .batch_size
220- if (i + 1 ) % self .log_freq == 0 :
220+ num_steps = i + 1
221+ if num_steps % self .log_freq == 0 :
221222 _logger .info (
222- f"Infer [{ i + 1 } /{ self .num_bench_iter } ]."
223+ f"Infer [{ num_steps } /{ self .num_bench_iter } ]."
223224 f" { num_samples / total_step :0.2f} samples/sec."
224- f" { 1000 * total_step / num_samples :0.3f} ms/sample ." )
225+ f" { 1000 * total_step / num_steps :0.3f} ms/step ." )
225226 t_run_end = self .time_fn (True )
226227 t_run_elapsed = t_run_end - t_run_start
227228
228229 results = dict (
229230 samples_per_sec = round (num_samples / t_run_elapsed , 2 ),
230- step_time = round (1000 * total_step / num_samples , 3 ),
231+ step_time = round (1000 * total_step / self . num_bench_iter , 3 ),
231232 batch_size = self .batch_size ,
232233 img_size = self .input_size [- 1 ],
233234 param_count = round (self .param_count / 1e6 , 2 ),
234235 )
235236
236237 _logger .info (
237238 f"Inference benchmark of { self .model_name } done. "
238- f"{ results ['samples_per_sec' ]:.2f} samples/sec, { results ['step_time' ]:.2f} ms/sample " )
239+ f"{ results ['samples_per_sec' ]:.2f} samples/sec, { results ['step_time' ]:.2f} ms/step " )
239240
240241 return results
241242
@@ -254,8 +255,8 @@ def __init__(self, model_name, device='cuda', torchscript=False, **kwargs):
254255
255256 self .optimizer = create_optimizer_v2 (
256257 self .model ,
257- opt_name = kwargs .pop ('opt' , 'sgd' ),
258- lr = kwargs .pop ('lr' , 1e-4 ))
258+ optimizer_name = kwargs .pop ('opt' , 'sgd' ),
259+ learning_rate = kwargs .pop ('lr' , 1e-4 ))
259260
260261 def _gen_target (self , batch_size ):
261262 return torch .empty (
@@ -309,23 +310,24 @@ def _step(detail=False):
309310 total_fwd += delta_fwd
310311 total_bwd += delta_bwd
311312 total_opt += delta_opt
312- if (i + 1 ) % self .log_freq == 0 :
313+ num_steps = (i + 1 )
314+ if num_steps % self .log_freq == 0 :
313315 total_step = total_fwd + total_bwd + total_opt
314316 _logger .info (
315- f"Train [{ i + 1 } /{ self .num_bench_iter } ]."
317+ f"Train [{ num_steps } /{ self .num_bench_iter } ]."
316318 f" { num_samples / total_step :0.2f} samples/sec."
317- f" { 1000 * total_fwd / num_samples :0.3f} ms/sample fwd,"
318- f" { 1000 * total_bwd / num_samples :0.3f} ms/sample bwd,"
319- f" { 1000 * total_opt / num_samples :0.3f} ms/sample opt."
319+ f" { 1000 * total_fwd / num_steps :0.3f} ms/step fwd,"
320+ f" { 1000 * total_bwd / num_steps :0.3f} ms/step bwd,"
321+ f" { 1000 * total_opt / num_steps :0.3f} ms/step opt."
320322 )
321323 total_step = total_fwd + total_bwd + total_opt
322324 t_run_elapsed = self .time_fn () - t_run_start
323325 results = dict (
324326 samples_per_sec = round (num_samples / t_run_elapsed , 2 ),
325- step_time = round (1000 * total_step / num_samples , 3 ),
326- fwd_time = round (1000 * total_fwd / num_samples , 3 ),
327- bwd_time = round (1000 * total_bwd / num_samples , 3 ),
328- opt_time = round (1000 * total_opt / num_samples , 3 ),
327+ step_time = round (1000 * total_step / self . num_bench_iter , 3 ),
328+ fwd_time = round (1000 * total_fwd / self . num_bench_iter , 3 ),
329+ bwd_time = round (1000 * total_bwd / self . num_bench_iter , 3 ),
330+ opt_time = round (1000 * total_opt / self . num_bench_iter , 3 ),
329331 batch_size = self .batch_size ,
330332 img_size = self .input_size [- 1 ],
331333 param_count = round (self .param_count / 1e6 , 2 ),
@@ -337,15 +339,16 @@ def _step(detail=False):
337339 delta_step = _step (False )
338340 num_samples += self .batch_size
339341 total_step += delta_step
340- if (i + 1 ) % self .log_freq == 0 :
342+ num_steps = (i + 1 )
343+ if num_steps % self .log_freq == 0 :
341344 _logger .info (
342- f"Train [{ i + 1 } /{ self .num_bench_iter } ]."
345+ f"Train [{ num_steps } /{ self .num_bench_iter } ]."
343346 f" { num_samples / total_step :0.2f} samples/sec."
344- f" { 1000 * total_step / num_samples :0.3f} ms/sample ." )
347+ f" { 1000 * total_step / num_steps :0.3f} ms/step ." )
345348 t_run_elapsed = self .time_fn () - t_run_start
346349 results = dict (
347350 samples_per_sec = round (num_samples / t_run_elapsed , 2 ),
348- step_time = round (1000 * total_step / num_samples , 3 ),
351+ step_time = round (1000 * total_step / self . num_bench_iter , 3 ),
349352 batch_size = self .batch_size ,
350353 img_size = self .input_size [- 1 ],
351354 param_count = round (self .param_count / 1e6 , 2 ),
0 commit comments