Qwen-Qwen1.5-7B-1719080668 / checkpoint-500 /trainer_state.json

Upload folder using huggingface_hub

3a246f3 verified 4 months ago

No virus

5.2 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.1595576619273302,
	"eval_steps": 500,
	"global_step": 500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.1263823064770932,
	"grad_norm": 1.039925456047058,
	"learning_rate": 4e-05,
	"loss": 2.1917,
	"step": 20
	},
	{
	"epoch": 0.2527646129541864,
	"grad_norm": 1.250934362411499,
	"learning_rate": 8e-05,
	"loss": 1.9713,
	"step": 40
	},
	{
	"epoch": 0.3791469194312796,
	"grad_norm": 1.2668324708938599,
	"learning_rate": 0.00012,
	"loss": 1.7553,
	"step": 60
	},
	{
	"epoch": 0.5055292259083728,
	"grad_norm": 1.822996973991394,
	"learning_rate": 0.00016,
	"loss": 1.6959,
	"step": 80
	},
	{
	"epoch": 0.631911532385466,
	"grad_norm": 1.8614917993545532,
	"learning_rate": 0.0002,
	"loss": 1.5757,
	"step": 100
	},
	{
	"epoch": 0.7582938388625592,
	"grad_norm": 2.021389961242676,
	"learning_rate": 0.0001924812030075188,
	"loss": 1.5615,
	"step": 120
	},
	{
	"epoch": 0.8846761453396524,
	"grad_norm": 1.8619110584259033,
	"learning_rate": 0.0001849624060150376,
	"loss": 1.4426,
	"step": 140
	},
	{
	"epoch": 1.0110584518167456,
	"grad_norm": 1.8433139324188232,
	"learning_rate": 0.0001774436090225564,
	"loss": 1.4429,
	"step": 160
	},
	{
	"epoch": 1.1374407582938388,
	"grad_norm": 2.324719190597534,
	"learning_rate": 0.0001699248120300752,
	"loss": 1.2625,
	"step": 180
	},
	{
	"epoch": 1.263823064770932,
	"grad_norm": 1.7142627239227295,
	"learning_rate": 0.00016240601503759398,
	"loss": 1.2677,
	"step": 200
	},
	{
	"epoch": 1.3902053712480253,
	"grad_norm": 2.427729606628418,
	"learning_rate": 0.0001548872180451128,
	"loss": 1.248,
	"step": 220
	},
	{
	"epoch": 1.5165876777251186,
	"grad_norm": 2.2092034816741943,
	"learning_rate": 0.00014736842105263158,
	"loss": 1.1584,
	"step": 240
	},
	{
	"epoch": 1.6429699842022116,
	"grad_norm": 1.9174597263336182,
	"learning_rate": 0.0001398496240601504,
	"loss": 1.1811,
	"step": 260
	},
	{
	"epoch": 1.7693522906793049,
	"grad_norm": 2.3690454959869385,
	"learning_rate": 0.00013233082706766918,
	"loss": 1.1186,
	"step": 280
	},
	{
	"epoch": 1.8957345971563981,
	"grad_norm": 1.9550652503967285,
	"learning_rate": 0.00012481203007518797,
	"loss": 1.1149,
	"step": 300
	},
	{
	"epoch": 2.022116903633491,
	"grad_norm": 2.081315517425537,
	"learning_rate": 0.00011729323308270677,
	"loss": 1.0422,
	"step": 320
	},
	{
	"epoch": 2.1484992101105846,
	"grad_norm": 3.5079455375671387,
	"learning_rate": 0.00010977443609022557,
	"loss": 0.8674,
	"step": 340
	},
	{
	"epoch": 2.2748815165876777,
	"grad_norm": 2.9713032245635986,
	"learning_rate": 0.00010225563909774436,
	"loss": 0.8312,
	"step": 360
	},
	{
	"epoch": 2.401263823064771,
	"grad_norm": 3.1877429485321045,
	"learning_rate": 9.473684210526316e-05,
	"loss": 0.7915,
	"step": 380
	},
	{
	"epoch": 2.527646129541864,
	"grad_norm": 3.9969420433044434,
	"learning_rate": 8.721804511278195e-05,
	"loss": 0.8997,
	"step": 400
	},
	{
	"epoch": 2.654028436018957,
	"grad_norm": 3.038134813308716,
	"learning_rate": 7.969924812030075e-05,
	"loss": 0.7495,
	"step": 420
	},
	{
	"epoch": 2.7804107424960507,
	"grad_norm": 2.5654478073120117,
	"learning_rate": 7.218045112781955e-05,
	"loss": 0.7727,
	"step": 440
	},
	{
	"epoch": 2.9067930489731437,
	"grad_norm": 3.3609306812286377,
	"learning_rate": 6.466165413533834e-05,
	"loss": 0.8009,
	"step": 460
	},
	{
	"epoch": 3.0331753554502368,
	"grad_norm": 3.048936367034912,
	"learning_rate": 5.714285714285714e-05,
	"loss": 0.753,
	"step": 480
	},
	{
	"epoch": 3.1595576619273302,
	"grad_norm": 2.0320680141448975,
	"learning_rate": 4.9624060150375936e-05,
	"loss": 0.5853,
	"step": 500
	}
	],
	"logging_steps": 20,
	"max_steps": 632,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 4,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.8241099229184e+16,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}