{
    "train_micro_batch_size_per_gpu": 1,
    "gradient_accumulation_steps": 2,
    "gradient_clipping": 1.0,
    "steps_per_print": 1
}