{ "train_micro_batch_size_per_gpu": 1, "gradient_accumulation_steps": 2, "gradient_clipping": 1.0, "steps_per_print": 1 }