Skip to content

Commit 037e5e6

Browse files
committed
Fix #1309, move wandb init after distributed init, only init on rank == 0 process
1 parent 9e12530 commit 037e5e6

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

train.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -347,13 +347,6 @@ def main():
347347
utils.setup_default_logging()
348348
args, args_text = _parse_args()
349349

350-
if args.log_wandb:
351-
if has_wandb:
352-
wandb.init(project=args.experiment, config=args)
353-
else:
354-
_logger.warning("You've requested to log metrics to wandb but package not found. "
355-
"Metrics not being logged to wandb, try `pip install wandb`")
356-
357350
args.prefetcher = not args.no_prefetcher
358351
args.distributed = False
359352
if 'WORLD_SIZE' in os.environ:
@@ -373,6 +366,13 @@ def main():
373366
_logger.info('Training with a single process on 1 GPUs.')
374367
assert args.rank >= 0
375368

369+
if args.rank == 0 and args.log_wandb:
370+
if has_wandb:
371+
wandb.init(project=args.experiment, config=args)
372+
else:
373+
_logger.warning("You've requested to log metrics to wandb but package not found. "
374+
"Metrics not being logged to wandb, try `pip install wandb`")
375+
376376
# resolve AMP arguments based on PyTorch / Apex availability
377377
use_amp = None
378378
if args.amp:

0 commit comments

Comments
 (0)