{"train/loss": 4.7054, "train/grad_norm": 75.04949188232422, "train/learning_rate": 0.0, "train/epoch": 0.0, "train/global_step": 60, "_timestamp": 1712906454.5679297, "_runtime": 37.446648836135864, "_step": 3, "train_runtime": 48.8747, "train_samples_per_second": 2.455, "train_steps_per_second": 1.228, "total_flos": 19847210803200.0, "train_loss": 4.619660822550456, "_wandb": {"runtime": 36}} |