ppo-Pyramids / run_logs /timers.json
OmBenz's picture
First Push
e1ca46d verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.15252816677093506,
"min": 0.14906516671180725,
"max": 1.414180874824524,
"count": 100
},
"Pyramids.Policy.Entropy.sum": {
"value": 4588.04736328125,
"min": 4464.7998046875,
"max": 42900.58984375,
"count": 100
},
"Pyramids.Step.mean": {
"value": 2999890.0,
"min": 29952.0,
"max": 2999890.0,
"count": 100
},
"Pyramids.Step.sum": {
"value": 2999890.0,
"min": 29952.0,
"max": 2999890.0,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.8182879686355591,
"min": -0.1800912469625473,
"max": 0.8639610409736633,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 252.85098266601562,
"min": -42.68162536621094,
"max": 264.73980712890625,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.01633444055914879,
"min": -0.01727745309472084,
"max": 0.46886947751045227,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 5.047342300415039,
"min": -4.9931840896606445,
"max": 111.12206268310547,
"count": 100
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06845543435645393,
"min": 0.06536668095607585,
"max": 0.07368367805766238,
"count": 100
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 1.026831515346809,
"min": 0.49879954672351967,
"max": 1.092246385746608,
"count": 100
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.016203644212024904,
"min": 0.0006459115332719983,
"max": 0.0173887597076257,
"count": 100
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.24305466318037358,
"min": 0.009042761465807975,
"max": 0.2443290390291105,
"count": 100
},
"Pyramids.Policy.LearningRate.mean": {
"value": 1.5214728262088886e-06,
"min": 1.5214728262088886e-06,
"max": 0.00029838354339596195,
"count": 100
},
"Pyramids.Policy.LearningRate.sum": {
"value": 2.282209239313333e-05,
"min": 2.282209239313333e-05,
"max": 0.0039690933769689005,
"count": 100
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10050712444444444,
"min": 0.10050712444444444,
"max": 0.19946118095238097,
"count": 100
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.5076068666666667,
"min": 1.3962282666666668,
"max": 2.767539433333334,
"count": 100
},
"Pyramids.Policy.Beta.mean": {
"value": 6.066173200000002e-05,
"min": 6.066173200000002e-05,
"max": 0.009946171977142856,
"count": 100
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0009099259800000002,
"min": 0.0009099259800000002,
"max": 0.13231080689000002,
"count": 100
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.00619171280413866,
"min": 0.006113977171480656,
"max": 0.5689794421195984,
"count": 100
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.09287568926811218,
"min": 0.08559568226337433,
"max": 3.982856035232544,
"count": 100
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 211.73333333333332,
"min": 210.13138686131387,
"max": 999.0,
"count": 100
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 31760.0,
"min": 15984.0,
"max": 33377.0,
"count": 100
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.73392078820491,
"min": -1.0000000521540642,
"max": 1.7871792959755866,
"count": 100
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 258.3541974425316,
"min": -30.354801654815674,
"max": 259.14099791646004,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.73392078820491,
"min": -1.0000000521540642,
"max": 1.7871792959755866,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 258.3541974425316,
"min": -30.354801654815674,
"max": 259.14099791646004,
"count": 100
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.013677320852500681,
"min": 0.013677320852500681,
"max": 11.693298460915685,
"count": 100
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.0379208070226014,
"min": 1.8333403850265313,
"max": 187.09277537465096,
"count": 100
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1705560733",
"python_version": "3.10.12 (main, Jan 18 2024, 15:14:31) [GCC 11.4.0]",
"command_line_arguments": "/home/omar/.pyenv/versions/3.10.12/envs/rlhf/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.1.2+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1705562784"
},
"total": 2051.1559370680006,
"count": 1,
"self": 0.16801876400131732,
"children": {
"run_training.setup": {
"total": 0.010624038000059954,
"count": 1,
"self": 0.010624038000059954
},
"TrainerController.start_learning": {
"total": 2050.9772942659993,
"count": 1,
"self": 1.6147928751834115,
"children": {
"TrainerController._reset_env": {
"total": 0.6259975359998862,
"count": 1,
"self": 0.6259975359998862
},
"TrainerController.advance": {
"total": 2048.6991612388165,
"count": 195036,
"self": 1.4775207274569766,
"children": {
"env_step": {
"total": 1287.8277061731478,
"count": 195036,
"self": 1166.8335389201993,
"children": {
"SubprocessEnvManager._take_step": {
"total": 119.98410006001086,
"count": 195036,
"self": 5.1350317988071765,
"children": {
"TorchPolicy.evaluate": {
"total": 114.84906826120368,
"count": 187567,
"self": 114.84906826120368
}
}
},
"workers": {
"total": 1.0100671929376404,
"count": 195036,
"self": 0.0,
"children": {
"worker_root": {
"total": 2048.7941494200104,
"count": 195036,
"is_parallel": true,
"self": 995.055227156945,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.000967019000199798,
"count": 1,
"is_parallel": true,
"self": 0.00026698600140662165,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0007000329987931764,
"count": 8,
"is_parallel": true,
"self": 0.0007000329987931764
}
}
},
"UnityEnvironment.step": {
"total": 0.01505296899995301,
"count": 1,
"is_parallel": true,
"self": 0.0001391280002280837,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00020458800008782418,
"count": 1,
"is_parallel": true,
"self": 0.00020458800008782418
},
"communicator.exchange": {
"total": 0.01417733899961604,
"count": 1,
"is_parallel": true,
"self": 0.01417733899961604
},
"steps_from_proto": {
"total": 0.0005319140000210609,
"count": 1,
"is_parallel": true,
"self": 0.00015708800128777511,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0003748259987332858,
"count": 8,
"is_parallel": true,
"self": 0.0003748259987332858
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1053.7389222630654,
"count": 195035,
"is_parallel": true,
"self": 20.99056299007316,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 17.357348764071503,
"count": 195035,
"is_parallel": true,
"self": 17.357348764071503
},
"communicator.exchange": {
"total": 947.2814714219066,
"count": 195035,
"is_parallel": true,
"self": 947.2814714219066
},
"steps_from_proto": {
"total": 68.10953908701413,
"count": 195035,
"is_parallel": true,
"self": 14.092228908545621,
"children": {
"_process_rank_one_or_two_observation": {
"total": 54.017310178468506,
"count": 1560280,
"is_parallel": true,
"self": 54.017310178468506
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 759.3939343382117,
"count": 195036,
"self": 3.448474896033076,
"children": {
"process_trajectory": {
"total": 163.58176201317292,
"count": 195036,
"self": 163.32704501017452,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2547170029984045,
"count": 6,
"self": 0.2547170029984045
}
}
},
"_update_policy": {
"total": 592.3636974290057,
"count": 1398,
"self": 391.9358900801535,
"children": {
"TorchPPOOptimizer.update": {
"total": 200.42780734885218,
"count": 68391,
"self": 200.42780734885218
}
}
}
}
}
}
},
"trainer_threads": {
"total": 5.800002327305265e-07,
"count": 1,
"self": 5.800002327305265e-07
},
"TrainerController._save_models": {
"total": 0.03734203599924513,
"count": 1,
"self": 0.0009203109993904945,
"children": {
"RLTrainer._checkpoint": {
"total": 0.03642172499985463,
"count": 1,
"self": 0.03642172499985463
}
}
}
}
}
}
}